Skip to content

Commit 47b7484

Browse files
committed
Cache hashing of categories in CategoricalDtype
1 parent abe7603 commit 47b7484

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ Performance improvements
529529
- Performance improvement in :class:`Styler` where render times are more than 50% reduced (:issue:`39972` :issue:`39952`)
530530
- Performance improvement in :meth:`core.window.ewm.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`)
531531
- Performance improvement in :meth:`.GroupBy.apply` when requiring the python fallback implementation (:issue:`40176`)
532+
- Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`)
532533

533534
.. ---------------------------------------------------------------------------
534535

pandas/core/dtypes/dtypes.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import pytz
2222

2323
from pandas._libs.interval import Interval
24+
from pandas._libs.properties import cache_readonly
2425
from pandas._libs.tslibs import (
2526
BaseOffset,
2627
NaT,
@@ -360,7 +361,7 @@ def __hash__(self) -> int:
360361
else:
361362
return -2
362363
# We *do* want to include the real self.ordered here
363-
return int(self._hash_categories(self.categories, self.ordered))
364+
return int(self._hash_categories)
364365

365366
def __eq__(self, other: Any) -> bool:
366367
"""
@@ -434,14 +435,17 @@ def __repr__(self) -> str_type:
434435
data = data.rstrip(", ")
435436
return f"CategoricalDtype(categories={data}, ordered={self.ordered})"
436437

437-
@staticmethod
438-
def _hash_categories(categories, ordered: Ordered = True) -> int:
438+
@cache_readonly
439+
def _hash_categories(self) -> int:
439440
from pandas.core.util.hashing import (
440441
combine_hash_arrays,
441442
hash_array,
442443
hash_tuples,
443444
)
444445

446+
categories = self.categories
447+
ordered = self.ordered
448+
445449
if len(categories) and isinstance(categories[0], tuple):
446450
# assumes if any individual category is a tuple, then all our. ATM
447451
# I don't really want to support just some of the categories being

0 commit comments

Comments
 (0)