Skip to content

Commit 98dcdf8

Browse files
committed
perf: Cach hashing of categories
1 parent 1abbbcf commit 98dcdf8

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

pandas/core/dtypes/dtypes.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> Non
343343

344344
self._categories = categories
345345
self._ordered = ordered
346+
self._hash_cache = -10 # Sentinel. -1 and -2 are used below with some smeantics
346347

347348
def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
348349
# for pickle compat. __get_state__ is defined in the
@@ -352,6 +353,8 @@ def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
352353
self._ordered = state.pop("ordered", False)
353354

354355
def __hash__(self) -> int:
356+
if self._hash_cache >= 0:
357+
return self._hash_cache
355358
# _hash_categories returns a uint64, so use the negative
356359
# space for when we have unknown categories to avoid a conflict
357360
if self.categories is None:
@@ -360,7 +363,8 @@ def __hash__(self) -> int:
360363
else:
361364
return -2
362365
# We *do* want to include the real self.ordered here
363-
return int(self._hash_categories(self.categories, self.ordered))
366+
self._hash_cache = int(self._hash_categories(self.categories, self.ordered))
367+
return self._hash_cache
364368

365369
def __eq__(self, other: Any) -> bool:
366370
"""

0 commit comments

Comments
 (0)