Skip to content

Commit 6006c05

Browse files
jbrockmendeltopper-123
authored andcommitted
PERF: PeriodDtype hash and eq (pandas-dev#52336)
* PERF: PeriodDtype __hash__/__eq__ * de-duplicate fastpath * Revert accidental * mypy fixup * troubleshoot min-versions build
1 parent 88b78f6 commit 6006c05

File tree

6 files changed

+20
-33
lines changed

6 files changed

+20
-33
lines changed

pandas/_libs/tslibs/dtypes.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,6 @@ cdef enum PeriodDtypeCode:
102102
cdef class PeriodDtypeBase:
103103
cdef readonly:
104104
PeriodDtypeCode _dtype_code
105+
int64_t _n
105106

106107
cpdef int _get_to_timestamp_base(self)

pandas/_libs/tslibs/dtypes.pyi

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,18 @@ def abbrev_to_npy_unit(abbrev: str) -> int: ...
1414

1515
class PeriodDtypeBase:
1616
_dtype_code: int # PeriodDtypeCode
17+
_n: int
1718

1819
# actually __cinit__
19-
def __new__(cls, code: int): ...
20+
def __new__(cls, code: int, n: int): ...
2021
@property
2122
def _freq_group_code(self) -> int: ...
2223
@property
2324
def _resolution_obj(self) -> Resolution: ...
2425
def _get_to_timestamp_base(self) -> int: ...
2526
@property
2627
def _freqstr(self) -> str: ...
28+
def __hash__(self) -> int: ...
2729

2830
class FreqGroup(Enum):
2931
FR_ANN: int

pandas/_libs/tslibs/dtypes.pyx

+7-2
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,22 @@ cdef class PeriodDtypeBase:
1818
"""
1919
# cdef readonly:
2020
# PeriodDtypeCode _dtype_code
21+
# int64_t _n
2122

22-
def __cinit__(self, PeriodDtypeCode code):
23+
def __cinit__(self, PeriodDtypeCode code, int64_t n):
2324
self._dtype_code = code
25+
self._n = n
2426

2527
def __eq__(self, other):
2628
if not isinstance(other, PeriodDtypeBase):
2729
return False
2830
if not isinstance(self, PeriodDtypeBase):
2931
# cython semantics, this is a reversed op
3032
return False
31-
return self._dtype_code == other._dtype_code
33+
return self._dtype_code == other._dtype_code and self._n == other._n
34+
35+
def __hash__(self) -> int:
36+
return hash((self._n, self._dtype_code))
3237

3338
@property
3439
def _freq_group_code(self) -> int:

pandas/_libs/tslibs/period.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -1671,7 +1671,7 @@ cdef class _Period(PeriodMixin):
16711671
# Note: this is more performant than PeriodDtype.from_date_offset(freq)
16721672
# because from_date_offset cannot be made a cdef method (until cython
16731673
# supported cdef classmethods)
1674-
self._dtype = PeriodDtypeBase(freq._period_dtype_code)
1674+
self._dtype = PeriodDtypeBase(freq._period_dtype_code, freq.n)
16751675

16761676
@classmethod
16771677
def _maybe_convert_freq(cls, object freq) -> BaseOffset:
@@ -1686,7 +1686,7 @@ cdef class _Period(PeriodMixin):
16861686
"""
16871687
if isinstance(freq, int):
16881688
# We already have a dtype code
1689-
dtype = PeriodDtypeBase(freq)
1689+
dtype = PeriodDtypeBase(freq, 1)
16901690
freq = dtype._freqstr
16911691

16921692
freq = to_offset(freq)

pandas/core/dtypes/dtypes.py

+3-13
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
862862
_metadata = ("freq",)
863863
_match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
864864
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}
865+
__hash__ = PeriodDtypeBase.__hash__
865866

866867
def __new__(cls, freq):
867868
"""
@@ -879,7 +880,7 @@ def __new__(cls, freq):
879880
return cls._cache_dtypes[freq.freqstr]
880881
except KeyError:
881882
dtype_code = freq._period_dtype_code
882-
u = PeriodDtypeBase.__new__(cls, dtype_code)
883+
u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n)
883884
u._freq = freq
884885
cls._cache_dtypes[freq.freqstr] = u
885886
return u
@@ -945,22 +946,11 @@ def name(self) -> str_type:
945946
def na_value(self) -> NaTType:
946947
return NaT
947948

948-
def __hash__(self) -> int:
949-
# make myself hashable
950-
return hash(str(self))
951-
952949
def __eq__(self, other: Any) -> bool:
953950
if isinstance(other, str):
954951
return other in [self.name, self.name.title()]
955952

956-
elif isinstance(other, PeriodDtype):
957-
# For freqs that can be held by a PeriodDtype, this check is
958-
# equivalent to (and much faster than) self.freq == other.freq
959-
sfreq = self._freq
960-
ofreq = other._freq
961-
return sfreq.n == ofreq.n and self._dtype_code == other._dtype_code
962-
963-
return False
953+
return super().__eq__(other)
964954

965955
def __ne__(self, other: Any) -> bool:
966956
return not self.__eq__(other)

pandas/core/indexes/period.py

+4-15
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
)
2626

2727
from pandas.core.dtypes.common import is_integer
28-
from pandas.core.dtypes.dtypes import PeriodDtype
2928
from pandas.core.dtypes.generic import ABCSeries
3029
from pandas.core.dtypes.missing import is_valid_na_for_dtype
3130

@@ -52,6 +51,9 @@
5251
Self,
5352
npt,
5453
)
54+
55+
from pandas.core.dtypes.dtypes import PeriodDtype
56+
5557
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
5658
_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
5759
_shared_doc_kwargs = {
@@ -314,20 +316,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
314316
"""
315317
Can we compare values of the given dtype to our own?
316318
"""
317-
if not isinstance(dtype, PeriodDtype):
318-
return False
319-
# For the subset of DateOffsets that can be a dtype.freq, it
320-
# suffices (and is much faster) to compare the dtype_code rather than
321-
# the freq itself.
322-
# See also: PeriodDtype.__eq__
323-
freq = dtype.freq
324-
own_freq = self.freq
325-
return (
326-
freq._period_dtype_code
327-
# error: "BaseOffset" has no attribute "_period_dtype_code"
328-
== own_freq._period_dtype_code # type: ignore[attr-defined]
329-
and freq.n == own_freq.n
330-
)
319+
return self.dtype == dtype
331320

332321
# ------------------------------------------------------------------------
333322
# Index Methods

0 commit comments

Comments
 (0)