Skip to content

DEPR: enforce DatetimeArray.astype deprecations #49235

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Enforced deprecation disallowing passing a timezone-aware :class:`Timestamp` and ``dtype="datetime64[ns]"`` to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`)
- Enforced deprecation disallowing passing a sequence of timezone-aware values and ``dtype="datetime64[ns]"`` to to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`)
- Enforced deprecation disallowing using ``.astype`` to convert a ``datetime64[ns]`` :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-aware dtype, use ``obj.tz_localize`` or ``ser.dt.tz_localize`` instead (:issue:`39258`)
- Enforced deprecation disallowing using ``.astype`` to convert a timezone-aware :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-naive ``datetime64[ns]`` dtype, use ``obj.tz_localize(None)`` or ``obj.tz_convert("UTC").tz_localize(None)`` instead (:issue:`39258`)
- Removed Date parser functions :func:`~pandas.io.date_converters.parse_date_time`,
:func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields`
and :func:`~pandas.io.date_converters.generic_parser` (:issue:`24518`)
Expand Down
26 changes: 19 additions & 7 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,12 @@
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_inclusive

from pandas.core.dtypes.astype import astype_dt64_to_dt64tz
from pandas.core.dtypes.common import (
DT64NS_DTYPE,
INT64_DTYPE,
is_bool_dtype,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_datetime64_ns_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_extension_array_dtype,
Expand Down Expand Up @@ -660,15 +658,29 @@ def astype(self, dtype, copy: bool = True):
return type(self)._simple_new(res_values, dtype=res_values.dtype)
# TODO: preserve freq?

elif is_datetime64_ns_dtype(dtype):
return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)

elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):
# tzaware unit conversion e.g. datetime64[s, UTC]
np_dtype = np.dtype(dtype.str)
res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
return type(self)._simple_new(res_values, dtype=dtype)
# TODO: preserve freq?
return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq)

elif self.tz is None and isinstance(dtype, DatetimeTZDtype):
# pre-2.0 this did self.tz_localize(dtype.tz), which did not match
# the Series behavior
raise TypeError(
"Cannot use .astype to convert from timezone-naive dtype to "
"timezone-aware dtype. Use obj.tz_localize instead."
)

elif self.tz is not None and is_datetime64_dtype(dtype):
# pre-2.0 behavior for DTA/DTI was
# values.tz_convert("UTC").tz_localize(None), which did not match
# the Series behavior
raise TypeError(
"Cannot use .astype to convert from timezone-aware dtype to "
"timezone-naive dtype. Use obj.tz_localize(None) or "
"obj.tz_convert('UTC').tz_localize(None) instead."
)

elif (
self.tz is None
Expand Down
94 changes: 8 additions & 86 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
import inspect
from typing import (
TYPE_CHECKING,
cast,
overload,
)
import warnings

import numpy as np

Expand All @@ -27,7 +25,6 @@
IgnoreRaise,
)
from pandas.errors import IntCastingNaNError
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_datetime64_dtype,
Expand All @@ -39,17 +36,13 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
ExtensionDtype,
PandasDtype,
)
from pandas.core.dtypes.missing import isna

if TYPE_CHECKING:
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
)
from pandas.core.arrays import ExtensionArray


_dtype_obj = np.dtype(object)
Expand Down Expand Up @@ -227,7 +220,13 @@ def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> Arra
raise TypeError(msg)

if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)
# Series.astype behavior pre-2.0 did
# values.tz_localize("UTC").tz_convert(dtype.tz)
# which did not match the DTA/DTI behavior.
raise TypeError(
"Cannot use .astype to convert from timezone-naive dtype to "
"timezone-aware dtype. Use ser.dt.tz_localize instead."
)

if is_dtype_equal(values.dtype, dtype):
if copy:
Expand Down Expand Up @@ -351,80 +350,3 @@ def astype_td64_unit_conversion(
mask = isna(values)
np.putmask(result, mask, np.nan)
return result


def astype_dt64_to_dt64tz(
values: ArrayLike, dtype: DtypeObj, copy: bool, via_utc: bool = False
) -> DatetimeArray:
# GH#33401 we have inconsistent behaviors between
# Datetimeindex[naive].astype(tzaware)
# Series[dt64].astype(tzaware)
# This collects them in one place to prevent further fragmentation.

from pandas.core.construction import ensure_wrapped_if_datetimelike

values = ensure_wrapped_if_datetimelike(values)
values = cast("DatetimeArray", values)
aware = isinstance(dtype, DatetimeTZDtype)

if via_utc:
# Series.astype behavior

# caller is responsible for checking this
assert values.tz is None and aware
dtype = cast(DatetimeTZDtype, dtype)

if copy:
# this should be the only copy
values = values.copy()

warnings.warn(
"Using .astype to convert from timezone-naive dtype to "
"timezone-aware dtype is deprecated and will raise in a "
"future version. Use ser.dt.tz_localize instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

# GH#33401 this doesn't match DatetimeArray.astype, which
# goes through the `not via_utc` path
return values.tz_localize("UTC").tz_convert(dtype.tz)

else:
# DatetimeArray/DatetimeIndex.astype behavior
if values.tz is None and aware:
dtype = cast(DatetimeTZDtype, dtype)
warnings.warn(
"Using .astype to convert from timezone-naive dtype to "
"timezone-aware dtype is deprecated and will raise in a "
"future version. Use obj.tz_localize instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

return values.tz_localize(dtype.tz)

elif aware:
# GH#18951: datetime64_tz dtype but not equal means different tz
dtype = cast(DatetimeTZDtype, dtype)
result = values.tz_convert(dtype.tz)
if copy:
result = result.copy()
return result

elif values.tz is not None:
warnings.warn(
"Using .astype to convert from timezone-aware dtype to "
"timezone-naive dtype is deprecated and will raise in a "
"future version. Use obj.tz_localize(None) or "
"obj.tz_convert('UTC').tz_localize(None) instead",
FutureWarning,
stacklevel=find_stack_level(),
)

result = values.tz_convert("UTC").tz_localize(None)
if copy:
result = result.copy()
return result

raise NotImplementedError("dtype_equal case should be handled elsewhere")
19 changes: 13 additions & 6 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,15 +364,22 @@ def test_astype_copies(self, dtype, other):
ser = pd.Series([1, 2], dtype=dtype)
orig = ser.copy()

warn = None
err = False
if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"):
# deprecated in favor of tz_localize
warn = FutureWarning

with tm.assert_produces_warning(warn):
err = True

if err:
if dtype == "datetime64[ns]":
msg = "Use ser.dt.tz_localize instead"
else:
msg = "from timezone-aware dtype to timezone-naive dtype"
with pytest.raises(TypeError, match=msg):
ser.astype(other)
else:
t = ser.astype(other)
t[:] = pd.NaT
tm.assert_series_equal(ser, orig)
t[:] = pd.NaT
tm.assert_series_equal(ser, orig)

@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
def test_astype_int(self, dtype):
Expand Down
23 changes: 3 additions & 20 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,27 +611,10 @@ def test_astype_dt64tz(self, timezone_frame):
result = timezone_frame.astype(object)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
msg = "Cannot use .astype to convert from timezone-aware dtype to timezone-"
with pytest.raises(TypeError, match=msg):
# dt64tz->dt64 deprecated
result = timezone_frame.astype("datetime64[ns]")
expected = DataFrame(
{
"A": date_range("20130101", periods=3),
"B": (
date_range("20130101", periods=3, tz="US/Eastern")
.tz_convert("UTC")
.tz_localize(None)
),
"C": (
date_range("20130101", periods=3, tz="CET")
.tz_convert("UTC")
.tz_localize(None)
),
}
)
expected.iloc[1, 1] = NaT
expected.iloc[1, 2] = NaT
tm.assert_frame_equal(result, expected)
timezone_frame.astype("datetime64[ns]")

def test_astype_dt64tz_to_str(self, timezone_frame):
# str formatting
Expand Down
34 changes: 10 additions & 24 deletions pandas/tests/indexes/datetimes/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,14 @@ def test_astype_with_tz(self):

# with tz
rng = date_range("1/1/2000", periods=10, tz="US/Eastern")
with tm.assert_produces_warning(FutureWarning):
msg = "Cannot use .astype to convert from timezone-aware"
with pytest.raises(TypeError, match=msg):
# deprecated
result = rng.astype("datetime64[ns]")
with tm.assert_produces_warning(FutureWarning):
rng.astype("datetime64[ns]")
with pytest.raises(TypeError, match=msg):
# check DatetimeArray while we're here deprecated
rng._data.astype("datetime64[ns]")

expected = (
date_range("1/1/2000", periods=10, tz="US/Eastern")
.tz_convert("UTC")
.tz_localize(None)
)
tm.assert_index_equal(result, expected)

def test_astype_tzaware_to_tzaware(self):
# GH 18951: tz-aware to tz-aware
idx = date_range("20170101", periods=4, tz="US/Pacific")
Expand All @@ -88,17 +82,14 @@ def test_astype_tznaive_to_tzaware(self):
# GH 18951: tz-naive to tz-aware
idx = date_range("20170101", periods=4)
idx = idx._with_freq(None) # tz_localize does not preserve freq
with tm.assert_produces_warning(FutureWarning):
msg = "Cannot use .astype to convert from timezone-naive"
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz deprecated
result = idx.astype("datetime64[ns, US/Eastern]")
with tm.assert_produces_warning(FutureWarning):
idx.astype("datetime64[ns, US/Eastern]")
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz deprecated
idx._data.astype("datetime64[ns, US/Eastern]")

expected = date_range("20170101", periods=4, tz="US/Eastern")
expected = expected._with_freq(None)
tm.assert_index_equal(result, expected)

def test_astype_str_nat(self):
# GH 13149, GH 13209
# verify that we are returning NaT as a string (and not unicode)
Expand Down Expand Up @@ -171,15 +162,10 @@ def test_astype_datetime64(self):
assert result is idx

idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST", name="idx")
with tm.assert_produces_warning(FutureWarning):
msg = "Cannot use .astype to convert from timezone-aware"
with pytest.raises(TypeError, match=msg):
# dt64tz->dt64 deprecated
result = idx_tz.astype("datetime64[ns]")
expected = DatetimeIndex(
["2016-05-16 05:00:00", "NaT", "NaT", "NaT"],
dtype="datetime64[ns]",
name="idx",
)
tm.assert_index_equal(result, expected)

def test_astype_object(self):
rng = date_range("1/1/2000", periods=20)
Expand Down
21 changes: 15 additions & 6 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,9 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
index = index.tz_localize(tz_naive_fixture)
dtype = index.dtype

warn = None if tz_naive_fixture is None else FutureWarning
# astype dt64 -> dt64tz deprecated
# As of 2.0 astype raises on dt64.astype(dt64tz)
err = tz_naive_fixture is not None
msg = "Cannot use .astype to convert from timezone-naive dtype to"

if attr == "asi8":
result = DatetimeIndex(arg).tz_localize(tz_naive_fixture)
Expand All @@ -254,11 +255,15 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
tm.assert_index_equal(result, index)

if attr == "asi8":
with tm.assert_produces_warning(warn):
if err:
with pytest.raises(TypeError, match=msg):
DatetimeIndex(arg).astype(dtype)
else:
result = DatetimeIndex(arg).astype(dtype)
tm.assert_index_equal(result, index)
else:
result = klass(arg, dtype=dtype)
tm.assert_index_equal(result, index)
tm.assert_index_equal(result, index)

if attr == "asi8":
result = DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture)
Expand All @@ -267,11 +272,15 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
tm.assert_index_equal(result, index)

if attr == "asi8":
with tm.assert_produces_warning(warn):
if err:
with pytest.raises(TypeError, match=msg):
DatetimeIndex(list(arg)).astype(dtype)
else:
result = DatetimeIndex(list(arg)).astype(dtype)
tm.assert_index_equal(result, index)
else:
result = klass(list(arg), dtype=dtype)
tm.assert_index_equal(result, index)
tm.assert_index_equal(result, index)

@pytest.mark.parametrize("attr", ["values", "asi8"])
@pytest.mark.parametrize("klass", [Index, TimedeltaIndex])
Expand Down
11 changes: 5 additions & 6 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,14 @@ def test_astype_datetime64tz(self):
tm.assert_series_equal(result, expected)

# astype - datetime64[ns, tz]
with tm.assert_produces_warning(FutureWarning):
msg = "Cannot use .astype to convert from timezone-naive"
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz astype deprecated
result = Series(ser.values).astype("datetime64[ns, US/Eastern]")
tm.assert_series_equal(result, ser)
Series(ser.values).astype("datetime64[ns, US/Eastern]")

with tm.assert_produces_warning(FutureWarning):
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz astype deprecated
result = Series(ser.values).astype(ser.dtype)
tm.assert_series_equal(result, ser)
Series(ser.values).astype(ser.dtype)

result = ser.astype("datetime64[ns, CET]")
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
Expand Down
Loading