Skip to content

Commit 57309e6

Browse files
lukemanleytopper-123
authored andcommitted
PERF: Indexing with pyarrow timestamp & duration dtypes (pandas-dev#53368)
* PERF: Indexing with pyarrow timestamp & duration dtypes * whatsnew
1 parent f567421 commit 57309e6

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ Performance improvements
303303
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.astype` when converting from a pyarrow timestamp or duration dtype to numpy (:issue:`53326`)
304304
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`52525`)
305305
- Performance improvement when doing various reshaping operations on :class:`arrays.IntegerArrays` & :class:`arrays.FloatingArray` by avoiding doing unnecessary validation (:issue:`53013`)
306+
- Performance improvement when indexing with pyarrow timestamp and duration dtypes (:issue:`53368`)
306307

307308
.. ---------------------------------------------------------------------------
308309
.. _whatsnew_210.bug_fixes:

pandas/core/indexes/base.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@
151151
Categorical,
152152
ExtensionArray,
153153
)
154+
from pandas.core.arrays.datetimes import tz_to_dtype
154155
from pandas.core.arrays.string_ import StringArray
155156
from pandas.core.base import (
156157
IndexOpsMixin,
@@ -191,8 +192,11 @@
191192
MultiIndex,
192193
Series,
193194
)
194-
from pandas.core.arrays import PeriodArray
195-
195+
from pandas.core.arrays import (
196+
DatetimeArray,
197+
PeriodArray,
198+
TimedeltaArray,
199+
)
196200

197201
__all__ = ["Index"]
198202

@@ -826,6 +830,22 @@ def _engine(
826830
) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine:
827831
# For base class (object dtype) we get ObjectEngine
828832
target_values = self._get_engine_target()
833+
834+
if isinstance(self._values, ArrowExtensionArray) and self.dtype.kind in "Mm":
835+
import pyarrow as pa
836+
837+
pa_type = self._values._pa_array.type
838+
if pa.types.is_timestamp(pa_type):
839+
dtype = tz_to_dtype(pa_type.tz, pa_type.unit)
840+
target_values = self._values.astype(dtype)
841+
target_values = cast("DatetimeArray", target_values)
842+
return libindex.DatetimeEngine(target_values._ndarray)
843+
elif pa.types.is_duration(pa_type):
844+
dtype = np.dtype(f"m8[{pa_type.unit}]")
845+
target_values = self._values.astype(dtype)
846+
target_values = cast("TimedeltaArray", target_values)
847+
return libindex.TimedeltaEngine(target_values._ndarray)
848+
829849
if isinstance(target_values, ExtensionArray):
830850
if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)):
831851
try:
@@ -5044,6 +5064,20 @@ def _get_engine_target(self) -> ArrayLike:
50445064
if isinstance(vals, StringArray):
50455065
# GH#45652 much more performant than ExtensionEngine
50465066
return vals._ndarray
5067+
if isinstance(vals, ArrowExtensionArray) and self.dtype.kind in "Mm":
5068+
import pyarrow as pa
5069+
5070+
pa_type = vals._pa_array.type
5071+
if pa.types.is_timestamp(pa_type):
5072+
dtype = tz_to_dtype(pa_type.tz, pa_type.unit)
5073+
vals = vals.astype(dtype)
5074+
vals = cast("DatetimeArray", vals)
5075+
return vals._ndarray.view("i8")
5076+
elif pa.types.is_duration(pa_type):
5077+
dtype = np.dtype(f"m8[{pa_type.unit}]")
5078+
vals = vals.astype(dtype)
5079+
vals = cast("TimedeltaArray", vals)
5080+
return vals._ndarray.view("i8")
50475081
if (
50485082
type(self) is Index
50495083
and isinstance(self._values, ExtensionArray)

0 commit comments

Comments
 (0)