From 643999e9d1f4873d5749b82268709484629a974e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Nov 2022 19:59:26 -0700 Subject: [PATCH 1/2] DEPR: DatetimeIndex indexing with mismatched tzawareness --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/indexes/datetimes.py | 41 +++++++--------- .../tests/frame/methods/test_reset_index.py | 8 +--- .../tests/indexes/datetimes/test_indexing.py | 32 ++++++++----- pandas/tests/indexing/test_datetime.py | 18 +++---- pandas/tests/series/indexing/test_datetime.py | 48 +++++++++---------- pandas/tests/series/methods/test_truncate.py | 6 ++- 7 files changed, 75 insertions(+), 79 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index d71160cdbc369..95be1c0008ddf 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -302,6 +302,7 @@ Removal of prior version deprecations/changes - Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`) - Changed the behavior of :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` with mismatched timezones to convert to UTC instead of casting to object dtype (:issue:`39328`) - Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`) +- Changed the behavior of indexing on a timezone-aware :class:`DatetimeIndex` with a timezone-naive ``datetime`` object or vice-versa; these now behave like any other non-comparable type (:issue:`36148`) - Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`) - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) - Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 73e25f9fe2f06..f93490174c0dc 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -552,31 +552,24 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): end = self._maybe_cast_for_get_loc(end) return start, end + def _disallow_mismatched_indexing(self, key, one_way: bool = False) -> None: + """ + Check for mismatched-tzawareness indexing and re-raise as KeyError. + """ + try: + self._deprecate_mismatched_indexing(key, one_way=one_way) + except TypeError as err: + raise KeyError(key) from err + def _deprecate_mismatched_indexing(self, key, one_way: bool = False) -> None: # GH#36148 # we get here with isinstance(key, self._data._recognized_scalars) - try: - self._data._assert_tzawareness_compat(key) - except TypeError: - if self.tz is None: - msg = ( - "Indexing a timezone-naive DatetimeIndex with a " - "timezone-aware datetime is deprecated and will " - "raise KeyError in a future version. " - "Use a timezone-naive object instead." - ) - elif one_way: - # we special-case timezone-naive strings and timezone-aware - # DatetimeIndex - return - else: - msg = ( - "Indexing a timezone-aware DatetimeIndex with a " - "timezone-naive datetime is deprecated and will " - "raise KeyError in a future version. " - "Use a timezone-aware object instead." - ) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + if self.tz is not None and one_way: + # we special-case timezone-naive strings and timezone-aware + # DatetimeIndex + return + + self._data._assert_tzawareness_compat(key) def get_loc(self, key, method=None, tolerance=None): """ @@ -594,7 +587,7 @@ def get_loc(self, key, method=None, tolerance=None): if isinstance(key, self._data._recognized_scalars): # needed to localize naive datetimes - self._deprecate_mismatched_indexing(key) + self._disallow_mismatched_indexing(key) key = self._maybe_cast_for_get_loc(key) elif isinstance(key, str): @@ -603,7 +596,7 @@ def get_loc(self, key, method=None, tolerance=None): parsed, reso = self._parse_with_reso(key) except ValueError as err: raise KeyError(key) from err - self._deprecate_mismatched_indexing(parsed, one_way=True) + self._disallow_mismatched_indexing(parsed, one_way=True) if self._can_partial_date_slice(reso): try: diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 30c033572335a..9ce6d405a9c37 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -350,11 +350,6 @@ def test_reset_index_multiindex_nan(self): ) def test_reset_index_with_datetimeindex_cols(self, name): # GH#5818 - warn = None - if isinstance(name, Timestamp) and name.tz is not None: - # _deprecate_mismatched_indexing - warn = FutureWarning - df = DataFrame( [[1, 2], [3, 4]], columns=date_range("1/1/2013", "1/2/2013"), @@ -362,8 +357,7 @@ def test_reset_index_with_datetimeindex_cols(self, name): ) df.index.name = name - with tm.assert_produces_warning(warn): - result = df.reset_index() + result = df.reset_index() item = name if name is not None else "index" columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 7e4df5ae8699c..7fee320c111c6 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -719,11 +719,13 @@ def test_get_slice_bounds_datetime_within( index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz) key = box(year=2000, month=1, day=7) - warn = None if tz is None else FutureWarning - with tm.assert_produces_warning(warn): - # GH#36148 will require tzawareness-compat + if tz is not None: + with pytest.raises(TypeError, match="Cannot compare tz-naive"): + # GH#36148 we require tzawareness-compat as of 2.0 + index.get_slice_bound(key, side=side) + else: result = index.get_slice_bound(key, side=side) - assert result == expected + assert result == expected @pytest.mark.parametrize("box", [datetime, Timestamp]) @pytest.mark.parametrize("side", ["left", "right"]) @@ -736,11 +738,13 @@ def test_get_slice_bounds_datetime_outside( index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz) key = box(year=year, month=1, day=7) - warn = None if tz is None else FutureWarning - with tm.assert_produces_warning(warn): - # GH#36148 will require tzawareness-compat + if tz is not None: + with pytest.raises(TypeError, match="Cannot compare tz-naive"): + # GH#36148 we require tzawareness-compat as of 2.0 + index.get_slice_bound(key, side=side) + else: result = index.get_slice_bound(key, side=side) - assert result == expected + assert result == expected @pytest.mark.parametrize("box", [datetime, Timestamp]) def test_slice_datetime_locs(self, box, tz_aware_fixture): @@ -749,12 +753,14 @@ def test_slice_datetime_locs(self, box, tz_aware_fixture): index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz) key = box(2010, 1, 1) - warn = None if tz is None else FutureWarning - with tm.assert_produces_warning(warn): - # GH#36148 will require tzawareness-compat + if tz is not None: + with pytest.raises(TypeError, match="Cannot compare tz-naive"): + # GH#36148 we require tzawareness-compat as of 2.0 + index.slice_locs(key, box(2010, 1, 2)) + else: result = index.slice_locs(key, box(2010, 1, 2)) - expected = (0, 1) - assert result == expected + expected = (0, 1) + assert result == expected class TestIndexerBetweenTime: diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 8d498b59c55d1..dc2fe85679181 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,3 +1,7 @@ +import re + +import pytest + import pandas as pd from pandas import ( DataFrame, @@ -17,14 +21,12 @@ def test_get_loc_naive_dti_aware_str_deprecated(self): ser = Series(range(100), index=dti) key = "2013-01-01 00:00:00.000000050+0000" - msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = ser[key] - assert res == 0 - - with tm.assert_produces_warning(FutureWarning, match=msg): - loc = dti.get_loc(key) - assert loc == 0 + msg = re.escape(repr(key)) + with pytest.raises(KeyError, match=msg): + ser[key] + + with pytest.raises(KeyError, match=msg): + dti.get_loc(key) def test_indexing_with_datetime_tz(self): diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index b8291471225d7..dc1693557ccf8 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -140,37 +140,35 @@ def test_getitem_setitem_datetimeindex(): msg = "Cannot compare tz-naive and tz-aware datetime-like objects" naive = datetime(1990, 1, 1, 4) for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]: - with tm.assert_produces_warning(FutureWarning): - # GH#36148 will require tzawareness compat - result = ts[key] - expected = ts[4] - assert result == expected + with pytest.raises(KeyError, match=re.escape(repr(key))): + # GH#36148 as of 2.0 we require tzawareness-compat + ts[key] result = ts.copy() - with tm.assert_produces_warning(FutureWarning): - # GH#36148 will require tzawareness compat - result[datetime(1990, 1, 1, 4)] = 0 - with tm.assert_produces_warning(FutureWarning): - # GH#36148 will require tzawareness compat - result[datetime(1990, 1, 1, 4)] = ts[4] - tm.assert_series_equal(result, ts) + # GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing, + # so setting it as a new key casts to object rather than matching + # rng[4] + result[naive] = ts[4] + assert result.index.dtype == object + tm.assert_index_equal(result.index[:-1], rng.astype(object)) + assert result.index[-1] == naive - with tm.assert_produces_warning(FutureWarning): - # GH#36148 will require tzawareness compat - result = ts[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] - expected = ts[4:8] - tm.assert_series_equal(result, expected) + msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + # GH#36148 require tzawareness compat as of 2.0 + ts[naive : datetime(1990, 1, 1, 7)] result = ts.copy() - with tm.assert_produces_warning(FutureWarning): - # GH#36148 will require tzawareness compat - result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = 0 - with tm.assert_produces_warning(FutureWarning): - # GH#36148 will require tzawareness compat - result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = ts[4:8] + with pytest.raises(TypeError, match=msg): + # GH#36148 require tzawareness compat as of 2.0 + result[naive : datetime(1990, 1, 1, 7)] = 0 + with pytest.raises(TypeError, match=msg): + # GH#36148 require tzawareness compat as of 2.0 + result[naive : datetime(1990, 1, 1, 7)] = 99 + # the __setitems__ here failed, so result should still match ts tm.assert_series_equal(result, ts) - lb = datetime(1990, 1, 1, 4) + lb = naive rb = datetime(1990, 1, 1, 7) msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime" with pytest.raises(TypeError, match=msg): @@ -178,7 +176,7 @@ def test_getitem_setitem_datetimeindex(): # see GH#18376, GH#18162 ts[(ts.index >= lb) & (ts.index <= rb)] - lb = Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo) + lb = Timestamp(naive).tz_localize(rng.tzinfo) rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo) result = ts[(ts.index >= lb) & (ts.index <= rb)] expected = ts[4:8] diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py index a3a27a744b180..33eb5c10ae163 100644 --- a/pandas/tests/series/methods/test_truncate.py +++ b/pandas/tests/series/methods/test_truncate.py @@ -1,5 +1,7 @@ from datetime import datetime +import pytest + import pandas as pd from pandas import ( Series, @@ -13,8 +15,8 @@ def test_truncate_datetimeindex_tz(self): # GH 9243 idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") s = Series(range(len(idx)), index=idx) - with tm.assert_produces_warning(FutureWarning): - # GH#36148 in the future will require tzawareness compat + with pytest.raises(TypeError, match="Cannot compare tz-naive"): + # GH#36148 as of 2.0 we require tzawareness compat s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) lb = idx[1] From 18f43252a798d1bc3769ff4ea6c5edbfeee73169 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 4 Nov 2022 11:20:59 -0700 Subject: [PATCH 2/2] clarify whatsnew --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 6151cc08d9704..4c7c8ce6391f4 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -399,7 +399,7 @@ Removal of prior version deprecations/changes - Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`) - Changed the behavior of :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` with mismatched timezones to convert to UTC instead of casting to object dtype (:issue:`39328`) - Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`) -- Changed the behavior of indexing on a timezone-aware :class:`DatetimeIndex` with a timezone-naive ``datetime`` object or vice-versa; these now behave like any other non-comparable type (:issue:`36148`) +- Changed the behavior of indexing on a timezone-aware :class:`DatetimeIndex` with a timezone-naive ``datetime`` object or vice-versa; these now behave like any other non-comparable type by raising ``KeyError`` (:issue:`36148`) - Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`) - Changed behavior of :meth:`Index.ravel` to return a view on the original :class:`Index` instead of a ``np.ndarray`` (:issue:`36900`) - Changed behavior of :meth:`Index.to_frame` with explicit ``name=None`` to use ``None`` for the column name instead of the index's name or default ``0`` (:issue:`45523`)