diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4ac737bb6b29a..6f3602b1d0202 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -148,6 +148,8 @@ Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Enforced deprecation disallowing passing a timezone-aware :class:`Timestamp` and ``dtype="datetime64[ns]"`` to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) - Enforced deprecation disallowing passing a sequence of timezone-aware values and ``dtype="datetime64[ns]"`` to to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) +- Enforced deprecation disallowing using ``.astype`` to convert a ``datetime64[ns]`` :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-aware dtype, use ``obj.tz_localize`` or ``ser.dt.tz_localize`` instead (:issue:`39258`) +- Enforced deprecation disallowing using ``.astype`` to convert a timezone-aware :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-naive ``datetime64[ns]`` dtype, use ``obj.tz_localize(None)`` or ``obj.tz_convert("UTC").tz_localize(None)`` instead (:issue:`39258`) - Removed Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` (:issue:`24518`) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ca0a745c180e9..19ef100f24f1b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -56,14 +56,12 @@ from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_inclusive -from pandas.core.dtypes.astype import astype_dt64_to_dt64tz from pandas.core.dtypes.common import ( DT64NS_DTYPE, INT64_DTYPE, is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype, - is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, @@ -660,15 +658,29 @@ def astype(self, dtype, copy: bool = True): return type(self)._simple_new(res_values, dtype=res_values.dtype) # TODO: preserve freq? - elif is_datetime64_ns_dtype(dtype): - return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False) - elif self.tz is not None and isinstance(dtype, DatetimeTZDtype): # tzaware unit conversion e.g. datetime64[s, UTC] np_dtype = np.dtype(dtype.str) res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy) - return type(self)._simple_new(res_values, dtype=dtype) - # TODO: preserve freq? + return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq) + + elif self.tz is None and isinstance(dtype, DatetimeTZDtype): + # pre-2.0 this did self.tz_localize(dtype.tz), which did not match + # the Series behavior + raise TypeError( + "Cannot use .astype to convert from timezone-naive dtype to " + "timezone-aware dtype. Use obj.tz_localize instead." + ) + + elif self.tz is not None and is_datetime64_dtype(dtype): + # pre-2.0 behavior for DTA/DTI was + # values.tz_convert("UTC").tz_localize(None), which did not match + # the Series behavior + raise TypeError( + "Cannot use .astype to convert from timezone-aware dtype to " + "timezone-naive dtype. Use obj.tz_localize(None) or " + "obj.tz_convert('UTC').tz_localize(None) instead." + ) elif ( self.tz is None diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index ad3dc0a876e00..718badc2e4085 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -7,10 +7,8 @@ import inspect from typing import ( TYPE_CHECKING, - cast, overload, ) -import warnings import numpy as np @@ -27,7 +25,6 @@ IgnoreRaise, ) from pandas.errors import IntCastingNaNError -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_datetime64_dtype, @@ -39,17 +36,13 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( - DatetimeTZDtype, ExtensionDtype, PandasDtype, ) from pandas.core.dtypes.missing import isna if TYPE_CHECKING: - from pandas.core.arrays import ( - DatetimeArray, - ExtensionArray, - ) + from pandas.core.arrays import ExtensionArray _dtype_obj = np.dtype(object) @@ -227,7 +220,13 @@ def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> Arra raise TypeError(msg) if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): - return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) + # Series.astype behavior pre-2.0 did + # values.tz_localize("UTC").tz_convert(dtype.tz) + # which did not match the DTA/DTI behavior. + raise TypeError( + "Cannot use .astype to convert from timezone-naive dtype to " + "timezone-aware dtype. Use ser.dt.tz_localize instead." + ) if is_dtype_equal(values.dtype, dtype): if copy: @@ -351,80 +350,3 @@ def astype_td64_unit_conversion( mask = isna(values) np.putmask(result, mask, np.nan) return result - - -def astype_dt64_to_dt64tz( - values: ArrayLike, dtype: DtypeObj, copy: bool, via_utc: bool = False -) -> DatetimeArray: - # GH#33401 we have inconsistent behaviors between - # Datetimeindex[naive].astype(tzaware) - # Series[dt64].astype(tzaware) - # This collects them in one place to prevent further fragmentation. - - from pandas.core.construction import ensure_wrapped_if_datetimelike - - values = ensure_wrapped_if_datetimelike(values) - values = cast("DatetimeArray", values) - aware = isinstance(dtype, DatetimeTZDtype) - - if via_utc: - # Series.astype behavior - - # caller is responsible for checking this - assert values.tz is None and aware - dtype = cast(DatetimeTZDtype, dtype) - - if copy: - # this should be the only copy - values = values.copy() - - warnings.warn( - "Using .astype to convert from timezone-naive dtype to " - "timezone-aware dtype is deprecated and will raise in a " - "future version. Use ser.dt.tz_localize instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - # GH#33401 this doesn't match DatetimeArray.astype, which - # goes through the `not via_utc` path - return values.tz_localize("UTC").tz_convert(dtype.tz) - - else: - # DatetimeArray/DatetimeIndex.astype behavior - if values.tz is None and aware: - dtype = cast(DatetimeTZDtype, dtype) - warnings.warn( - "Using .astype to convert from timezone-naive dtype to " - "timezone-aware dtype is deprecated and will raise in a " - "future version. Use obj.tz_localize instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - return values.tz_localize(dtype.tz) - - elif aware: - # GH#18951: datetime64_tz dtype but not equal means different tz - dtype = cast(DatetimeTZDtype, dtype) - result = values.tz_convert(dtype.tz) - if copy: - result = result.copy() - return result - - elif values.tz is not None: - warnings.warn( - "Using .astype to convert from timezone-aware dtype to " - "timezone-naive dtype is deprecated and will raise in a " - "future version. Use obj.tz_localize(None) or " - "obj.tz_convert('UTC').tz_localize(None) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - - result = values.tz_convert("UTC").tz_localize(None) - if copy: - result = result.copy() - return result - - raise NotImplementedError("dtype_equal case should be handled elsewhere") diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index b27d90e43d860..24779c6e0c89d 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -364,15 +364,22 @@ def test_astype_copies(self, dtype, other): ser = pd.Series([1, 2], dtype=dtype) orig = ser.copy() - warn = None + err = False if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"): # deprecated in favor of tz_localize - warn = FutureWarning - - with tm.assert_produces_warning(warn): + err = True + + if err: + if dtype == "datetime64[ns]": + msg = "Use ser.dt.tz_localize instead" + else: + msg = "from timezone-aware dtype to timezone-naive dtype" + with pytest.raises(TypeError, match=msg): + ser.astype(other) + else: t = ser.astype(other) - t[:] = pd.NaT - tm.assert_series_equal(ser, orig) + t[:] = pd.NaT + tm.assert_series_equal(ser, orig) @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index bebc44505f02a..69087b6822f2e 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -611,27 +611,10 @@ def test_astype_dt64tz(self, timezone_frame): result = timezone_frame.astype(object) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): + msg = "Cannot use .astype to convert from timezone-aware dtype to timezone-" + with pytest.raises(TypeError, match=msg): # dt64tz->dt64 deprecated - result = timezone_frame.astype("datetime64[ns]") - expected = DataFrame( - { - "A": date_range("20130101", periods=3), - "B": ( - date_range("20130101", periods=3, tz="US/Eastern") - .tz_convert("UTC") - .tz_localize(None) - ), - "C": ( - date_range("20130101", periods=3, tz="CET") - .tz_convert("UTC") - .tz_localize(None) - ), - } - ) - expected.iloc[1, 1] = NaT - expected.iloc[1, 2] = NaT - tm.assert_frame_equal(result, expected) + timezone_frame.astype("datetime64[ns]") def test_astype_dt64tz_to_str(self, timezone_frame): # str formatting diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index e7823f0c90b1a..a9a35f26d58a3 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -62,20 +62,14 @@ def test_astype_with_tz(self): # with tz rng = date_range("1/1/2000", periods=10, tz="US/Eastern") - with tm.assert_produces_warning(FutureWarning): + msg = "Cannot use .astype to convert from timezone-aware" + with pytest.raises(TypeError, match=msg): # deprecated - result = rng.astype("datetime64[ns]") - with tm.assert_produces_warning(FutureWarning): + rng.astype("datetime64[ns]") + with pytest.raises(TypeError, match=msg): # check DatetimeArray while we're here deprecated rng._data.astype("datetime64[ns]") - expected = ( - date_range("1/1/2000", periods=10, tz="US/Eastern") - .tz_convert("UTC") - .tz_localize(None) - ) - tm.assert_index_equal(result, expected) - def test_astype_tzaware_to_tzaware(self): # GH 18951: tz-aware to tz-aware idx = date_range("20170101", periods=4, tz="US/Pacific") @@ -88,17 +82,14 @@ def test_astype_tznaive_to_tzaware(self): # GH 18951: tz-naive to tz-aware idx = date_range("20170101", periods=4) idx = idx._with_freq(None) # tz_localize does not preserve freq - with tm.assert_produces_warning(FutureWarning): + msg = "Cannot use .astype to convert from timezone-naive" + with pytest.raises(TypeError, match=msg): # dt64->dt64tz deprecated - result = idx.astype("datetime64[ns, US/Eastern]") - with tm.assert_produces_warning(FutureWarning): + idx.astype("datetime64[ns, US/Eastern]") + with pytest.raises(TypeError, match=msg): # dt64->dt64tz deprecated idx._data.astype("datetime64[ns, US/Eastern]") - expected = date_range("20170101", periods=4, tz="US/Eastern") - expected = expected._with_freq(None) - tm.assert_index_equal(result, expected) - def test_astype_str_nat(self): # GH 13149, GH 13209 # verify that we are returning NaT as a string (and not unicode) @@ -171,15 +162,10 @@ def test_astype_datetime64(self): assert result is idx idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST", name="idx") - with tm.assert_produces_warning(FutureWarning): + msg = "Cannot use .astype to convert from timezone-aware" + with pytest.raises(TypeError, match=msg): # dt64tz->dt64 deprecated result = idx_tz.astype("datetime64[ns]") - expected = DatetimeIndex( - ["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], - dtype="datetime64[ns]", - name="idx", - ) - tm.assert_index_equal(result, expected) def test_astype_object(self): rng = date_range("1/1/2000", periods=20) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4b0821a50e09b..ff08b72b4a10d 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -244,8 +244,9 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): index = index.tz_localize(tz_naive_fixture) dtype = index.dtype - warn = None if tz_naive_fixture is None else FutureWarning - # astype dt64 -> dt64tz deprecated + # As of 2.0 astype raises on dt64.astype(dt64tz) + err = tz_naive_fixture is not None + msg = "Cannot use .astype to convert from timezone-naive dtype to" if attr == "asi8": result = DatetimeIndex(arg).tz_localize(tz_naive_fixture) @@ -254,11 +255,15 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): tm.assert_index_equal(result, index) if attr == "asi8": - with tm.assert_produces_warning(warn): + if err: + with pytest.raises(TypeError, match=msg): + DatetimeIndex(arg).astype(dtype) + else: result = DatetimeIndex(arg).astype(dtype) + tm.assert_index_equal(result, index) else: result = klass(arg, dtype=dtype) - tm.assert_index_equal(result, index) + tm.assert_index_equal(result, index) if attr == "asi8": result = DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture) @@ -267,11 +272,15 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): tm.assert_index_equal(result, index) if attr == "asi8": - with tm.assert_produces_warning(warn): + if err: + with pytest.raises(TypeError, match=msg): + DatetimeIndex(list(arg)).astype(dtype) + else: result = DatetimeIndex(list(arg)).astype(dtype) + tm.assert_index_equal(result, index) else: result = klass(list(arg), dtype=dtype) - tm.assert_index_equal(result, index) + tm.assert_index_equal(result, index) @pytest.mark.parametrize("attr", ["values", "asi8"]) @pytest.mark.parametrize("klass", [Index, TimedeltaIndex]) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 498225307b52e..9b57f0f634a6c 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -211,15 +211,14 @@ def test_astype_datetime64tz(self): tm.assert_series_equal(result, expected) # astype - datetime64[ns, tz] - with tm.assert_produces_warning(FutureWarning): + msg = "Cannot use .astype to convert from timezone-naive" + with pytest.raises(TypeError, match=msg): # dt64->dt64tz astype deprecated - result = Series(ser.values).astype("datetime64[ns, US/Eastern]") - tm.assert_series_equal(result, ser) + Series(ser.values).astype("datetime64[ns, US/Eastern]") - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match=msg): # dt64->dt64tz astype deprecated - result = Series(ser.values).astype(ser.dtype) - tm.assert_series_equal(result, ser) + Series(ser.values).astype(ser.dtype) result = ser.astype("datetime64[ns, CET]") expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET")) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 0fd508b08f1db..dd28f28f89bcb 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -155,18 +155,19 @@ class TestSeriesConvertDtypes: def test_convert_dtypes( self, data, maindtype, params, expected_default, expected_other ): - warn = None if ( hasattr(data, "dtype") and data.dtype == "M8[ns]" and isinstance(maindtype, pd.DatetimeTZDtype) ): # this astype is deprecated in favor of tz_localize - warn = FutureWarning + msg = "Cannot use .astype to convert from timezone-naive dtype" + with pytest.raises(TypeError, match=msg): + pd.Series(data, dtype=maindtype) + return if maindtype is not None: - with tm.assert_produces_warning(warn): - series = pd.Series(data, dtype=maindtype) + series = pd.Series(data, dtype=maindtype) else: series = pd.Series(data)