Skip to content

PERF: PeriodDtype hash and eq #52336

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,6 @@ cdef enum PeriodDtypeCode:
cdef class PeriodDtypeBase:
cdef readonly:
PeriodDtypeCode _dtype_code
int64_t _n

cpdef int _get_to_timestamp_base(self)
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@ def abbrev_to_npy_unit(abbrev: str) -> int: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
_n: int

# actually __cinit__
def __new__(cls, code: int): ...
def __new__(cls, code: int, n: int): ...
@property
def _freq_group_code(self) -> int: ...
@property
def _resolution_obj(self) -> Resolution: ...
def _get_to_timestamp_base(self) -> int: ...
@property
def _freqstr(self) -> str: ...
def __hash__(self) -> int: ...

class FreqGroup(Enum):
FR_ANN: int
Expand Down
9 changes: 7 additions & 2 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,22 @@ cdef class PeriodDtypeBase:
"""
# cdef readonly:
# PeriodDtypeCode _dtype_code
# int64_t _n

def __cinit__(self, PeriodDtypeCode code):
def __cinit__(self, PeriodDtypeCode code, int64_t n):
self._dtype_code = code
self._n = n

def __eq__(self, other):
if not isinstance(other, PeriodDtypeBase):
return False
if not isinstance(self, PeriodDtypeBase):
# cython semantics, this is a reversed op
return False
return self._dtype_code == other._dtype_code
return self._dtype_code == other._dtype_code and self._n == other._n

def __hash__(self) -> int:
return hash((self._n, self._dtype_code))

@property
def _freq_group_code(self) -> int:
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1671,7 +1671,7 @@ cdef class _Period(PeriodMixin):
# Note: this is more performant than PeriodDtype.from_date_offset(freq)
# because from_date_offset cannot be made a cdef method (until cython
# supported cdef classmethods)
self._dtype = PeriodDtypeBase(freq._period_dtype_code)
self._dtype = PeriodDtypeBase(freq._period_dtype_code, freq.n)

@classmethod
def _maybe_convert_freq(cls, object freq) -> BaseOffset:
Expand All @@ -1686,7 +1686,7 @@ cdef class _Period(PeriodMixin):
"""
if isinstance(freq, int):
# We already have a dtype code
dtype = PeriodDtypeBase(freq)
dtype = PeriodDtypeBase(freq, 1)
freq = dtype._freqstr

freq = to_offset(freq)
Expand Down
16 changes: 3 additions & 13 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
_metadata = ("freq",)
_match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
_cache_dtypes: dict[str_type, PandasExtensionDtype] = {}
__hash__ = PeriodDtypeBase.__hash__

def __new__(cls, freq):
"""
Expand All @@ -879,7 +880,7 @@ def __new__(cls, freq):
return cls._cache_dtypes[freq.freqstr]
except KeyError:
dtype_code = freq._period_dtype_code
u = PeriodDtypeBase.__new__(cls, dtype_code)
u = PeriodDtypeBase.__new__(cls, dtype_code, freq.n)
u._freq = freq
cls._cache_dtypes[freq.freqstr] = u
return u
Expand Down Expand Up @@ -945,22 +946,11 @@ def name(self) -> str_type:
def na_value(self) -> NaTType:
return NaT

def __hash__(self) -> int:
# make myself hashable
return hash(str(self))

def __eq__(self, other: Any) -> bool:
if isinstance(other, str):
return other in [self.name, self.name.title()]

elif isinstance(other, PeriodDtype):
# For freqs that can be held by a PeriodDtype, this check is
# equivalent to (and much faster than) self.freq == other.freq
sfreq = self._freq
ofreq = other._freq
return sfreq.n == ofreq.n and self._dtype_code == other._dtype_code

return False
return super().__eq__(other)

def __ne__(self, other: Any) -> bool:
return not self.__eq__(other)
Expand Down
19 changes: 4 additions & 15 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
)

from pandas.core.dtypes.common import is_integer
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import is_valid_na_for_dtype

Expand All @@ -52,6 +51,9 @@
Self,
npt,
)

from pandas.core.dtypes.dtypes import PeriodDtype

_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
_shared_doc_kwargs = {
Expand Down Expand Up @@ -314,20 +316,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
if not isinstance(dtype, PeriodDtype):
return False
# For the subset of DateOffsets that can be a dtype.freq, it
# suffices (and is much faster) to compare the dtype_code rather than
# the freq itself.
# See also: PeriodDtype.__eq__
freq = dtype.freq
own_freq = self.freq
return (
freq._period_dtype_code
# error: "BaseOffset" has no attribute "_period_dtype_code"
== own_freq._period_dtype_code # type: ignore[attr-defined]
and freq.n == own_freq.n
)
return self.dtype == dtype

# ------------------------------------------------------------------------
# Index Methods
Expand Down