Skip to content

DEPR: DatetimeIndex.intersection with mixed timezones cast to UTC, not object #45357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,11 @@ Slicing on a :class:`DataFrame` will not be affected.
Other Deprecations
^^^^^^^^^^^^^^^^^^
- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`)
- Deprecated behavior of :meth:`DatetimeIndex.intersection` and :meth:`DatetimeIndex.symmetric_difference` (``union`` behavior was already deprecated in version 1.3.0) with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`, :issue:`45357`)
- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`)
-


.. ---------------------------------------------------------------------------
.. _whatsnew_150.performance:

Expand Down
50 changes: 35 additions & 15 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3040,6 +3040,30 @@ def _validate_sort_keyword(self, sort):
f"None or False; {sort} was passed."
)

@final
def _deprecate_dti_setop(self, other: Index, setop: str_t):
"""
Deprecate setop behavior between timezone-aware DatetimeIndexes with
mismatched timezones.
"""
# Caller is responsibelf or checking
# `not is_dtype_equal(self.dtype, other.dtype)`
if (
isinstance(self, ABCDatetimeIndex)
and isinstance(other, ABCDatetimeIndex)
and self.tz is not None
and other.tz is not None
):
# GH#39328, GH#45357
warnings.warn(
f"In a future version, the {setop} of DatetimeIndex objects "
"with mismatched timezones will cast both to UTC instead of "
"object dtype. To retain the old behavior, "
f"use `index.astype(object).{setop}(other)`",
FutureWarning,
stacklevel=find_stack_level(),
)

@final
def union(self, other, sort=None):
"""
Expand Down Expand Up @@ -3138,21 +3162,7 @@ def union(self, other, sort=None):
"Can only union MultiIndex with MultiIndex or Index of tuples, "
"try mi.to_flat_index().union(other) instead."
)
if (
isinstance(self, ABCDatetimeIndex)
and isinstance(other, ABCDatetimeIndex)
and self.tz is not None
and other.tz is not None
):
# GH#39328
warnings.warn(
"In a future version, the union of DatetimeIndex objects "
"with mismatched timezones will cast both to UTC instead of "
"object dtype. To retain the old behavior, "
"use `index.astype(object).union(other)`",
FutureWarning,
stacklevel=find_stack_level(),
)
self._deprecate_dti_setop(other, "union")

dtype = self._find_common_type_compat(other)
left = self.astype(dtype, copy=False)
Expand Down Expand Up @@ -3288,6 +3298,9 @@ def intersection(self, other, sort=False):
self._assert_can_do_setop(other)
other, result_name = self._convert_can_do_setop(other)

if not is_dtype_equal(self.dtype, other.dtype):
self._deprecate_dti_setop(other, "intersection")

if self.equals(other):
if self.has_duplicates:
return self.unique()._get_reconciled_name_object(other)
Expand Down Expand Up @@ -3418,6 +3431,10 @@ def difference(self, other, sort=None):
self._assert_can_do_setop(other)
other, result_name = self._convert_can_do_setop(other)

# Note: we do NOT call _deprecate_dti_setop here, as there
# is no requirement that .difference be commutative, so it does
# not cast to object.

if self.equals(other):
# Note: we do not (yet) sort even if sort=None GH#24959
return self[:0].rename(result_name)
Expand Down Expand Up @@ -3492,6 +3509,9 @@ def symmetric_difference(self, other, result_name=None, sort=None):
if result_name is None:
result_name = result_name_update

if not is_dtype_equal(self.dtype, other.dtype):
self._deprecate_dti_setop(other, "symmetric_difference")

if not self._should_compare(other):
return self.union(other, sort=sort).rename(result_name)

Expand Down
13 changes: 8 additions & 5 deletions pandas/tests/indexes/datetimes/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,20 +1140,23 @@ def test_dti_convert_tz_aware_datetime_datetime(self, tz):
tm.assert_numpy_array_equal(converted.asi8, ex_vals)
assert converted.tz is pytz.utc

def test_dti_union_aware(self):
# Note: not difference, as there is no symmetry requirement there
@pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
def test_dti_setop_aware(self, setop):
# non-overlapping
rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central")

rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern")

with tm.assert_produces_warning(FutureWarning):
# # GH#39328 will cast both to UTC
result = rng.union(rng2)
result = getattr(rng, setop)(rng2)

expected = rng.astype("O").union(rng2.astype("O"))
expected = getattr(rng.astype("O"), setop)(rng2.astype("O"))
tm.assert_index_equal(result, expected)
assert result[0].tz.zone == "US/Central"
assert result[-1].tz.zone == "US/Eastern"
if len(result):
assert result[0].tz.zone == "US/Central"
assert result[-1].tz.zone == "US/Eastern"

def test_dti_union_mixed(self):
# GH 21671
Expand Down