Skip to content

DEPR: DTA(float_data, dtype=dt64tz) #49361

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ Removal of prior version deprecations/changes
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
- Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`)
- Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`)
- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)
Expand Down
23 changes: 3 additions & 20 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2230,27 +2230,10 @@ def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):
return data, copy

if is_float_dtype(data.dtype):
# Note: we must cast to datetime64[ns] here in order to treat these
# as wall-times instead of UTC timestamps.
data = data.astype(DT64NS_DTYPE)
# pre-2.0 we treated these as wall-times, inconsistent with ints
# GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes
data = data.astype(np.int64)
copy = False
if (
tz is not None
and len(data) > 0
and not timezones.is_utc(timezones.maybe_get_tz(tz))
):
# GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes
warnings.warn(
"The behavior of DatetimeArray._from_sequence with a timezone-aware "
"dtype and floating-dtype data is deprecated. In a future version, "
"this data will be interpreted as nanosecond UTC timestamps "
"instead of wall-times, matching the behavior with integer dtypes. "
"To retain the old behavior, explicitly cast to 'datetime64[ns]' "
"before passing the data to pandas. To get the future behavior, "
"first cast to 'int64'.",
FutureWarning,
stacklevel=find_stack_level(),
)

elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):
# GH#29794 enforcing deprecation introduced in GH#23539
Expand Down
10 changes: 4 additions & 6 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,11 +426,9 @@ def test_array_equivalent(dtype_equal):
dtype_equal=dtype_equal,
)

msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times"
with tm.assert_produces_warning(FutureWarning, match=msg):
dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern")
dti2 = DatetimeIndex([0, np.nan], tz="CET")
dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern")
dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern")
dti2 = DatetimeIndex([0, np.nan], tz="CET")
dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern")

assert array_equivalent(
dti1,
Expand All @@ -444,7 +442,7 @@ def test_array_equivalent(dtype_equal):
)
# The rest are not dtype_equal
assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1)
assert not array_equivalent(
assert array_equivalent(
dti2,
dti1,
)
Expand Down
20 changes: 1 addition & 19 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,10 +396,7 @@ def test_astype_nan_to_bool(self):
def test_astype_ea_to_datetimetzdtype(self, dtype):
# GH37553
ser = Series([4, 0, 9], dtype=dtype)
warn = FutureWarning if ser.dtype.kind == "f" else None
msg = "with a timezone-aware dtype and floating-dtype data"
with tm.assert_produces_warning(warn, match=msg):
result = ser.astype(DatetimeTZDtype(tz="US/Pacific"))
result = ser.astype(DatetimeTZDtype(tz="US/Pacific"))

expected = Series(
{
Expand All @@ -409,21 +406,6 @@ def test_astype_ea_to_datetimetzdtype(self, dtype):
}
)

if dtype in tm.FLOAT_EA_DTYPES:
expected = Series(
{
0: Timestamp(
"1970-01-01 00:00:00.000000004-08:00", tz="US/Pacific"
),
1: Timestamp(
"1970-01-01 00:00:00.000000000-08:00", tz="US/Pacific"
),
2: Timestamp(
"1970-01-01 00:00:00.000000009-08:00", tz="US/Pacific"
),
}
)

tm.assert_series_equal(result, expected)

def test_astype_retain_Attrs(self, any_numpy_dtype):
Expand Down