Skip to content

DEPR: DatetimeIndex setops with mismatched tzs #49455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ Removal of prior version deprecations/changes
- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`)
- Changed the behavior of :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` with mismatched timezones to convert to UTC instead of casting to object dtype (:issue:`39328`)
- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
Expand Down
8 changes: 0 additions & 8 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from pandas.errors import InvalidIndexError

from pandas.core.dtypes.cast import find_common_type
from pandas.core.dtypes.common import is_dtype_equal

from pandas.core.algorithms import safe_sort
from pandas.core.indexes.base import (
Expand Down Expand Up @@ -276,7 +275,6 @@ def _find_common_index_dtype(inds):

if kind == "special":
result = indexes[0]
first = result

dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
dti_tzs = [x for x in dtis if x.tz is not None]
Expand All @@ -289,12 +287,6 @@ def _find_common_index_dtype(inds):

if len(dtis) == len(indexes):
sort = True
if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes):
# i.e. timezones mismatch
# TODO(2.0): once deprecation is enforced, this union will
# cast to UTC automatically.
indexes = [x.tz_convert("UTC") for x in indexes]

result = indexes[0]

elif len(dtis) > 1:
Expand Down
25 changes: 10 additions & 15 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3075,10 +3075,9 @@ def _validate_sort_keyword(self, sort):
)

@final
def _deprecate_dti_setop(self, other: Index, setop: str_t) -> None:
def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]:
"""
Deprecate setop behavior between timezone-aware DatetimeIndexes with
mismatched timezones.
With mismatched timezones, cast both to UTC.
"""
# Caller is responsibelf or checking
# `not is_dtype_equal(self.dtype, other.dtype)`
Expand All @@ -3089,14 +3088,10 @@ def _deprecate_dti_setop(self, other: Index, setop: str_t) -> None:
and other.tz is not None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also skip the tz_convert if both .tzs are "UTC"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we only get here with non-matching dtypes. i'll follow up to make that clearer

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah gotcha. That'd be helpful, thanks

):
# GH#39328, GH#45357
warnings.warn(
f"In a future version, the {setop} of DatetimeIndex objects "
"with mismatched timezones will cast both to UTC instead of "
"object dtype. To retain the old behavior, "
f"use `index.astype(object).{setop}(other)`",
FutureWarning,
stacklevel=find_stack_level(),
)
left = self.tz_convert("UTC")
right = other.tz_convert("UTC")
return left, right
return self, other

@final
def union(self, other, sort=None):
Expand Down Expand Up @@ -3196,7 +3191,7 @@ def union(self, other, sort=None):
"Can only union MultiIndex with MultiIndex or Index of tuples, "
"try mi.to_flat_index().union(other) instead."
)
self._deprecate_dti_setop(other, "union")
self, other = self._dti_setop_align_tzs(other, "union")

dtype = self._find_common_type_compat(other)
left = self.astype(dtype, copy=False)
Expand Down Expand Up @@ -3333,7 +3328,7 @@ def intersection(self, other, sort: bool = False):
other, result_name = self._convert_can_do_setop(other)

if not is_dtype_equal(self.dtype, other.dtype):
self._deprecate_dti_setop(other, "intersection")
self, other = self._dti_setop_align_tzs(other, "intersection")

if self.equals(other):
if self.has_duplicates:
Expand Down Expand Up @@ -3481,7 +3476,7 @@ def difference(self, other, sort=None):
self._assert_can_do_setop(other)
other, result_name = self._convert_can_do_setop(other)

# Note: we do NOT call _deprecate_dti_setop here, as there
# Note: we do NOT call _dti_setop_align_tzs here, as there
# is no requirement that .difference be commutative, so it does
# not cast to object.

Expand Down Expand Up @@ -3565,7 +3560,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
result_name = result_name_update

if not is_dtype_equal(self.dtype, other.dtype):
self._deprecate_dti_setop(other, "symmetric_difference")
self, other = self._dti_setop_align_tzs(other, "symmetric_difference")

if not self._should_compare(other):
return self.union(other, sort=sort).rename(result_name)
Expand Down
12 changes: 0 additions & 12 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,18 +429,6 @@ def _can_range_setop(self, other) -> bool:
return False
return super()._can_range_setop(other)

def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]:
this = self

if isinstance(other, DatetimeIndex):
if (self.tz is None) ^ (other.tz is None):
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

if not timezones.tz_compare(self.tz, other.tz):
this = self.tz_convert("UTC")
other = other.tz_convert("UTC")
return this, other

# --------------------------------------------------------------------

def _get_time_micros(self) -> npt.NDArray[np.int64]:
Expand Down
14 changes: 8 additions & 6 deletions pandas/tests/indexes/datetimes/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -1155,19 +1155,21 @@ def test_dti_convert_tz_aware_datetime_datetime(self, tz):
@pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
def test_dti_setop_aware(self, setop):
# non-overlapping
# GH#39328 as of 2.0 we cast these to UTC instead of object
rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central")

rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern")

with tm.assert_produces_warning(FutureWarning):
# # GH#39328 will cast both to UTC
result = getattr(rng, setop)(rng2)
result = getattr(rng, setop)(rng2)

expected = getattr(rng.astype("O"), setop)(rng2.astype("O"))
left = rng.tz_convert("UTC")
right = rng2.tz_convert("UTC")
expected = getattr(left, setop)(right)
tm.assert_index_equal(result, expected)
assert result.tz == left.tz
if len(result):
assert result[0].tz.zone == "US/Central"
assert result[-1].tz.zone == "US/Eastern"
assert result[0].tz.zone == "UTC"
assert result[-1].tz.zone == "UTC"

def test_dti_union_mixed(self):
# GH 21671
Expand Down