From 2320b5612c4adf44df95344185244f9fb1267c07 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 20 Jun 2023 15:41:07 -0700 Subject: [PATCH 1/2] Backport PR #53652: BUG: Indexing a timestamp ArrowDtype Index --- doc/source/whatsnew/v2.0.3.rst | 1 + pandas/core/indexes/base.py | 2 +- pandas/tests/indexing/test_datetime.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 0f56f203a5802..160223168d83c 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -26,6 +26,7 @@ Bug fixes - Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`) - Bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` with ``expand=True`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`53532`) - Bug in indexing methods (e.g. :meth:`DataFrame.__getitem__`) where taking the entire :class:`DataFrame`/:class:`Series` would raise an ``OverflowError`` when Copy on Write was enabled and the length of the array was over the maximum size a 32-bit integer can hold (:issue:`53616`) +- Bug when indexing a :class:`DataFrame` or :class:`Series` with an :class:`Index` with a timestamp :class:`ArrowDtype` would raise an ``AttributeError`` (:issue:`53644`) .. --------------------------------------------------------------------------- .. _whatsnew_203.other: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7f03819af6723..9395b1d3163f0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5880,7 +5880,7 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray] if isinstance(key, Index): # GH 42790 - Preserve name from an Index keyarr.name = key.name - if keyarr.dtype.kind in ["m", "M"]: + if keyarr.dtype.kind in ["m", "M"] or isinstance(keyarr.dtype, DatetimeTZDtype): # DTI/TDI.take can infer a freq in some cases when we dont want one if isinstance(key, list) or ( isinstance(key, type(self)) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 15e1fae77d65b..6510612ba6f87 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -168,3 +168,21 @@ def test_getitem_str_slice_millisecond_resolution(self, frame_or_series): ], ) tm.assert_equal(result, expected) + + def test_getitem_pyarrow_index(self, frame_or_series): + # GH 53644 + pytest.importorskip("pyarrow") + obj = frame_or_series( + range(5), + index=date_range("2020", freq="D", periods=5).astype( + "timestamp[us][pyarrow]" + ), + ) + result = obj.loc[obj.index[:-3]] + expected = frame_or_series( + range(2), + index=date_range("2020", freq="D", periods=2).astype( + "timestamp[us][pyarrow]" + ), + ) + tm.assert_equal(result, expected) From d4c8734e83f55f65e06cab2960225ac0e9787a99 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 20 Jun 2023 17:19:56 -0700 Subject: [PATCH 2/2] Check for np.dtype only --- pandas/core/indexes/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9395b1d3163f0..0e2ef1d63655d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5880,7 +5880,9 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray] if isinstance(key, Index): # GH 42790 - Preserve name from an Index keyarr.name = key.name - if keyarr.dtype.kind in ["m", "M"] or isinstance(keyarr.dtype, DatetimeTZDtype): + if ( + isinstance(keyarr.dtype, np.dtype) and keyarr.dtype.kind in ["m", "M"] + ) or isinstance(keyarr.dtype, DatetimeTZDtype): # DTI/TDI.take can infer a freq in some cases when we dont want one if isinstance(key, list) or ( isinstance(key, type(self))