Skip to content

DEPR: treating float data as wall-times in DTA._from_sequence #45573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 19, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ Other Deprecations
- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`)
- Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`)
- Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`)
- Deprecated treating float-dtype data as wall-times when passed with a timezone to :class:`Series` or :class:`DatetimeIndex` (:issue:`45573`)
-


Expand Down
24 changes: 20 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2063,7 +2063,7 @@ def _sequence_to_dt64ns(
inferred_freq = data.freq

# By this point we are assured to have either a numpy array or Index
data, copy = maybe_convert_dtype(data, copy)
data, copy = maybe_convert_dtype(data, copy, tz=tz)
data_dtype = getattr(data, "dtype", None)

if (
Expand Down Expand Up @@ -2240,7 +2240,7 @@ def objects_to_datetime64ns(
raise TypeError(result)


def maybe_convert_dtype(data, copy: bool):
def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):
"""
Convert data based on dtype conventions, issuing deprecation warnings
or errors where appropriate.
Expand All @@ -2249,6 +2249,7 @@ def maybe_convert_dtype(data, copy: bool):
----------
data : np.ndarray or pd.Index
copy : bool
tz : tzinfo or None, default None

Returns
-------
Expand All @@ -2268,8 +2269,23 @@ def maybe_convert_dtype(data, copy: bool):
# as wall-times instead of UTC timestamps.
data = data.astype(DT64NS_DTYPE)
copy = False
# TODO: deprecate this behavior to instead treat symmetrically
# with integer dtypes. See discussion in GH#23675
if (
tz is not None
and len(data) > 0
and not timezones.is_utc(timezones.maybe_get_tz(tz))
):
# GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes
warnings.warn(
"The behavior of DatetimeArray._from_sequence with a timezone-aware "
"dtype and floating-dtype data is deprecated. In a future version, "
"this data will be interpreted as nanosecond UTC timestamps "
"instead of wall-times, matching the behavior with integer dtypes. "
"To retain the old behavior, explicitly cast to 'datetime64[ns]' "
"before passing the data to pandas. To get the future behavior, "
"first cast to 'int64'.",
FutureWarning,
stacklevel=find_stack_level(),
)

elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):
# GH#29794 enforcing deprecation introduced in GH#23539
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
iNaT,
nat_strings,
parsing,
timezones,
)
from pandas._libs.tslibs.parsing import ( # noqa:F401
DateParseError,
Expand Down Expand Up @@ -364,7 +365,7 @@ def _convert_listlike_datetimes(
# NB: this must come after unit transformation
orig_arg = arg
try:
arg, _ = maybe_convert_dtype(arg, copy=False)
arg, _ = maybe_convert_dtype(arg, copy=False, tz=timezones.maybe_get_tz(tz))
except TypeError:
if errors == "coerce":
npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
Expand Down
23 changes: 14 additions & 9 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,23 +415,28 @@ def test_array_equivalent(dtype_equal):
TimedeltaIndex([1, np.nan]),
dtype_equal=dtype_equal,
)

msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times"
with tm.assert_produces_warning(FutureWarning, match=msg):
dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern")
dti2 = DatetimeIndex([0, np.nan], tz="CET")
dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern")

assert array_equivalent(
DatetimeIndex([0, np.nan], tz="US/Eastern"),
DatetimeIndex([0, np.nan], tz="US/Eastern"),
dti1,
dti1,
dtype_equal=dtype_equal,
)
assert not array_equivalent(
DatetimeIndex([0, np.nan], tz="US/Eastern"),
DatetimeIndex([1, np.nan], tz="US/Eastern"),
dti1,
dti3,
dtype_equal=dtype_equal,
)
# The rest are not dtype_equal
assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1)
assert not array_equivalent(
DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan], tz="US/Eastern")
)
assert not array_equivalent(
DatetimeIndex([0, np.nan], tz="CET"),
DatetimeIndex([0, np.nan], tz="US/Eastern"),
dti2,
dti1,
)

assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan]))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0, maker=new_bl
assert m is not None, f"incompatible typestr -> {typestr}"
tz = m.groups()[0]
assert num_items == 1, "must have only 1 num items for a tz-aware"
values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)._data
values = DatetimeIndex(np.arange(N) * 10 ** 9, tz=tz)._data
values = ensure_block_shape(values, ndim=len(shape))
elif typestr in ("timedelta", "td", "m8[ns]"):
values = (mat * 1).astype("m8[ns]")
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,12 @@ def test_astype_nan_to_bool(self):
)
def test_astype_ea_to_datetimetzdtype(self, dtype):
# GH37553
result = Series([4, 0, 9], dtype=dtype).astype(DatetimeTZDtype(tz="US/Pacific"))
ser = Series([4, 0, 9], dtype=dtype)
warn = FutureWarning if ser.dtype.kind == "f" else None
msg = "with a timezone-aware dtype and floating-dtype data"
with tm.assert_produces_warning(warn, match=msg):
result = ser.astype(DatetimeTZDtype(tz="US/Pacific"))

expected = Series(
{
0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"),
Expand Down