diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1050eed40fbb4..be4a7f6390e37 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -165,6 +165,7 @@ Removal of prior version deprecations/changes - Removed deprecated :meth:`Index.is_mixed`, check ``index.inferred_type`` directly instead (:issue:`32922`) - Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`) - Removed deprecated :meth:`Index.asi8` (:issue:`37877`) +- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`) - Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`) - Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`) - Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 07d689d737c87..8395d54224f1d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2123,10 +2123,15 @@ def _sequence_to_dt64ns( # Convert tz-naive to UTC # TODO: if tz is UTC, are there situations where we *don't* want a # copy? tz_localize_to_utc always makes one. + shape = data.shape + if data.ndim > 1: + data = data.ravel() + data = tzconversion.tz_localize_to_utc( data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit ) data = data.view(new_dtype) + data = data.reshape(shape) assert data.dtype == new_dtype, data.dtype result = data diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b7db95269439c..447006572f22d 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -55,10 +55,7 @@ is_object_dtype, is_timedelta64_ns_dtype, ) -from pandas.core.dtypes.dtypes import ( - DatetimeTZDtype, - PandasDtype, -) +from pandas.core.dtypes.dtypes import PandasDtype from pandas.core.dtypes.generic import ( ABCExtensionArray, ABCIndex, @@ -800,16 +797,6 @@ def _try_cast( elif isinstance(dtype, ExtensionDtype): # create an extension array from its dtype - if isinstance(dtype, DatetimeTZDtype): - # We can't go through _from_sequence because it handles dt64naive - # data differently; _from_sequence treats naive as wall times, - # while maybe_cast_to_datetime treats it as UTC - # see test_maybe_promote_any_numpy_dtype_with_datetimetz - # TODO(2.0): with deprecations enforced, should be able to remove - # special case. - return maybe_cast_to_datetime(arr, dtype) - # TODO: copy? - array_type = dtype.construct_array_type()._from_sequence subarr = array_type(arr, dtype=dtype, copy=copy) return subarr diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9830d22f3e2e5..ec313f91d2721 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -65,7 +65,6 @@ is_complex, is_complex_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_float, @@ -1314,13 +1313,15 @@ def try_timedelta(v: np.ndarray) -> np.ndarray: def maybe_cast_to_datetime( - value: ExtensionArray | np.ndarray | list, dtype: DtypeObj | None + value: ExtensionArray | np.ndarray | list, dtype: np.dtype | None ) -> ExtensionArray | np.ndarray: """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT We allow a list *only* when dtype is not None. + + Caller is responsible for handling ExtensionDtype cases. """ from pandas.core.arrays.datetimes import sequence_to_datetimes from pandas.core.arrays.timedeltas import TimedeltaArray @@ -1332,18 +1333,22 @@ def maybe_cast_to_datetime( # TODO: _from_sequence would raise ValueError in cases where # _ensure_nanosecond_dtype raises TypeError dtype = cast(np.dtype, dtype) - dtype = _ensure_nanosecond_dtype(dtype) + # Incompatible types in assignment (expression has type "Union[dtype[Any], + # ExtensionDtype]", variable has type "Optional[dtype[Any]]") + dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment] res = TimedeltaArray._from_sequence(value, dtype=dtype) return res if dtype is not None: is_datetime64 = is_datetime64_dtype(dtype) - is_datetime64tz = is_datetime64tz_dtype(dtype) vdtype = getattr(value, "dtype", None) - if is_datetime64 or is_datetime64tz: - dtype = _ensure_nanosecond_dtype(dtype) + if is_datetime64: + # Incompatible types in assignment (expression has type + # "Union[dtype[Any], ExtensionDtype]", variable has type + # "Optional[dtype[Any]]") + dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment] value = np.array(value, copy=False) @@ -1352,59 +1357,22 @@ def maybe_cast_to_datetime( _disallow_mismatched_datetimelike(value, dtype) try: - if is_datetime64: - dta = sequence_to_datetimes(value) - # GH 25843: Remove tz information since the dtype - # didn't specify one - - if dta.tz is not None: - raise ValueError( - "Cannot convert timezone-aware data to " - "timezone-naive dtype. Use " - "pd.Series(values).dt.tz_localize(None) instead." - ) - - # TODO(2.0): Do this astype in sequence_to_datetimes to - # avoid potential extra copy? - dta = dta.astype(dtype, copy=False) - value = dta - elif is_datetime64tz: - dtype = cast(DatetimeTZDtype, dtype) - # The string check can be removed once issue #13712 - # is solved. String data that is passed with a - # datetime64tz is assumed to be naive which should - # be localized to the timezone. - is_dt_string = is_string_dtype(value.dtype) - dta = sequence_to_datetimes(value) - if dta.tz is not None: - value = dta.astype(dtype, copy=False) - elif is_dt_string: - # Strings here are naive, so directly localize - # equiv: dta.astype(dtype) # though deprecated - - value = dta.tz_localize(dtype.tz) - else: - # Numeric values are UTC at this point, - # so localize and convert - # equiv: Series(dta).astype(dtype) # though deprecated - if getattr(vdtype, "kind", None) == "M": - # GH#24559, GH#33401 deprecate behavior inconsistent - # with DatetimeArray/DatetimeIndex - warnings.warn( - "In a future version, constructing a Series " - "from datetime64[ns] data and a " - "DatetimeTZDtype will interpret the data " - "as wall-times instead of " - "UTC times, matching the behavior of " - "DatetimeIndex. To treat the data as UTC " - "times, use pd.Series(data).dt" - ".tz_localize('UTC').tz_convert(dtype.tz) " - "or pd.Series(data.view('int64'), dtype=dtype)", - FutureWarning, - stacklevel=find_stack_level(), - ) - - value = dta.tz_localize("UTC").tz_convert(dtype.tz) + dta = sequence_to_datetimes(value) + # GH 25843: Remove tz information since the dtype + # didn't specify one + + if dta.tz is not None: + raise ValueError( + "Cannot convert timezone-aware data to " + "timezone-naive dtype. Use " + "pd.Series(values).dt.tz_localize(None) instead." + ) + + # TODO(2.0): Do this astype in sequence_to_datetimes to + # avoid potential extra copy? + dta = dta.astype(dtype, copy=False) + value = dta + except OutOfBoundsDatetime: raise except ParserError: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index c1d0ab730fe7e..054663fcd0626 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -43,6 +43,7 @@ is_named_tuple, is_object_dtype, ) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, @@ -1054,7 +1055,15 @@ def _convert_object_array( def convert(arr): if dtype != np.dtype("O"): arr = lib.maybe_convert_objects(arr) - arr = maybe_cast_to_datetime(arr, dtype) + + if isinstance(dtype, ExtensionDtype): + # TODO: test(s) that get here + # TODO: try to de-duplicate this convert function with + # core.construction functions + cls = dtype.construct_array_type() + arr = cls._from_sequence(arr, dtype=dtype, copy=False) + else: + arr = maybe_cast_to_datetime(arr, dtype) return arr arrays = [convert(arr) for arr in content] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3faddfeca38bd..35ebd152f447c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1241,14 +1241,14 @@ def test_construction_consistency(self): result = Series(ser.dt.tz_convert("UTC"), dtype=ser.dtype) tm.assert_series_equal(result, ser) - msg = "will interpret the data as wall-times" - with tm.assert_produces_warning(FutureWarning, match=msg): - # deprecate behavior inconsistent with DatetimeIndex GH#33401 - result = Series(ser.values, dtype=ser.dtype) - tm.assert_series_equal(result, ser) + # Pre-2.0 dt64 values were treated as utc, which was inconsistent + # with DatetimeIndex, which treats them as wall times, see GH#33401 + result = Series(ser.values, dtype=ser.dtype) + expected = Series(ser.values).dt.tz_localize(ser.dtype.tz) + tm.assert_series_equal(result, expected) with tm.assert_produces_warning(None): - # one suggested alternative to the deprecated usage + # one suggested alternative to the deprecated (changed in 2.0) usage middle = Series(ser.values).dt.tz_localize("UTC") result = middle.dt.tz_convert(ser.dtype.tz) tm.assert_series_equal(result, ser)