From b091f14111996145837478b630148fe4c8a7da12 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Nov 2022 14:39:33 -0700 Subject: [PATCH] DEPR: DatetimeIndex setops with mismatched tzs --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/indexes/api.py | 8 ------ pandas/core/indexes/base.py | 25 ++++++++----------- pandas/core/indexes/datetimes.py | 12 --------- .../tests/indexes/datetimes/test_timezones.py | 14 ++++++----- 5 files changed, 19 insertions(+), 41 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 6d1f2afab3c6d..c5fb8444680db 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -284,6 +284,7 @@ Removal of prior version deprecations/changes - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) - Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`) - Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`) +- Changed the behavior of :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` with mismatched timezones to convert to UTC instead of casting to object dtype (:issue:`39328`) - Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`) - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 58dd207bb4353..c562eaffd241d 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -13,7 +13,6 @@ from pandas.errors import InvalidIndexError from pandas.core.dtypes.cast import find_common_type -from pandas.core.dtypes.common import is_dtype_equal from pandas.core.algorithms import safe_sort from pandas.core.indexes.base import ( @@ -276,7 +275,6 @@ def _find_common_index_dtype(inds): if kind == "special": result = indexes[0] - first = result dtis = [x for x in indexes if isinstance(x, DatetimeIndex)] dti_tzs = [x for x in dtis if x.tz is not None] @@ -289,12 +287,6 @@ def _find_common_index_dtype(inds): if len(dtis) == len(indexes): sort = True - if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes): - # i.e. timezones mismatch - # TODO(2.0): once deprecation is enforced, this union will - # cast to UTC automatically. - indexes = [x.tz_convert("UTC") for x in indexes] - result = indexes[0] elif len(dtis) > 1: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d8300bb29c274..6418498d5e3bb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3075,10 +3075,9 @@ def _validate_sort_keyword(self, sort): ) @final - def _deprecate_dti_setop(self, other: Index, setop: str_t) -> None: + def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]: """ - Deprecate setop behavior between timezone-aware DatetimeIndexes with - mismatched timezones. + With mismatched timezones, cast both to UTC. """ # Caller is responsibelf or checking # `not is_dtype_equal(self.dtype, other.dtype)` @@ -3089,14 +3088,10 @@ def _deprecate_dti_setop(self, other: Index, setop: str_t) -> None: and other.tz is not None ): # GH#39328, GH#45357 - warnings.warn( - f"In a future version, the {setop} of DatetimeIndex objects " - "with mismatched timezones will cast both to UTC instead of " - "object dtype. To retain the old behavior, " - f"use `index.astype(object).{setop}(other)`", - FutureWarning, - stacklevel=find_stack_level(), - ) + left = self.tz_convert("UTC") + right = other.tz_convert("UTC") + return left, right + return self, other @final def union(self, other, sort=None): @@ -3196,7 +3191,7 @@ def union(self, other, sort=None): "Can only union MultiIndex with MultiIndex or Index of tuples, " "try mi.to_flat_index().union(other) instead." ) - self._deprecate_dti_setop(other, "union") + self, other = self._dti_setop_align_tzs(other, "union") dtype = self._find_common_type_compat(other) left = self.astype(dtype, copy=False) @@ -3333,7 +3328,7 @@ def intersection(self, other, sort: bool = False): other, result_name = self._convert_can_do_setop(other) if not is_dtype_equal(self.dtype, other.dtype): - self._deprecate_dti_setop(other, "intersection") + self, other = self._dti_setop_align_tzs(other, "intersection") if self.equals(other): if self.has_duplicates: @@ -3481,7 +3476,7 @@ def difference(self, other, sort=None): self._assert_can_do_setop(other) other, result_name = self._convert_can_do_setop(other) - # Note: we do NOT call _deprecate_dti_setop here, as there + # Note: we do NOT call _dti_setop_align_tzs here, as there # is no requirement that .difference be commutative, so it does # not cast to object. @@ -3565,7 +3560,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): result_name = result_name_update if not is_dtype_equal(self.dtype, other.dtype): - self._deprecate_dti_setop(other, "symmetric_difference") + self, other = self._dti_setop_align_tzs(other, "symmetric_difference") if not self._should_compare(other): return self.union(other, sort=sort).rename(result_name) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 667deec23757f..73e25f9fe2f06 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -429,18 +429,6 @@ def _can_range_setop(self, other) -> bool: return False return super()._can_range_setop(other) - def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]: - this = self - - if isinstance(other, DatetimeIndex): - if (self.tz is None) ^ (other.tz is None): - raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") - - if not timezones.tz_compare(self.tz, other.tz): - this = self.tz_convert("UTC") - other = other.tz_convert("UTC") - return this, other - # -------------------------------------------------------------------- def _get_time_micros(self) -> npt.NDArray[np.int64]: diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index a07f21f785828..0bc2862e55021 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1155,19 +1155,21 @@ def test_dti_convert_tz_aware_datetime_datetime(self, tz): @pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"]) def test_dti_setop_aware(self, setop): # non-overlapping + # GH#39328 as of 2.0 we cast these to UTC instead of object rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central") rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern") - with tm.assert_produces_warning(FutureWarning): - # # GH#39328 will cast both to UTC - result = getattr(rng, setop)(rng2) + result = getattr(rng, setop)(rng2) - expected = getattr(rng.astype("O"), setop)(rng2.astype("O")) + left = rng.tz_convert("UTC") + right = rng2.tz_convert("UTC") + expected = getattr(left, setop)(right) tm.assert_index_equal(result, expected) + assert result.tz == left.tz if len(result): - assert result[0].tz.zone == "US/Central" - assert result[-1].tz.zone == "US/Eastern" + assert result[0].tz.zone == "UTC" + assert result[-1].tz.zone == "UTC" def test_dti_union_mixed(self): # GH 21671