Skip to content

Commit 9fdd0b8

Browse files
committed
Cache hashing of categories in CategoricalDtype
1 parent cf30dc7 commit 9fdd0b8

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ Performance improvements
567567
- Performance improvement in :class:`Styler` where render times are more than 50% reduced (:issue:`39972` :issue:`39952`)
568568
- Performance improvement in :meth:`core.window.ewm.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`)
569569
- Performance improvement in :meth:`.GroupBy.apply` when requiring the python fallback implementation (:issue:`40176`)
570+
- Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`)
570571

571572
.. ---------------------------------------------------------------------------
572573

pandas/core/dtypes/dtypes.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pytz
1616

1717
from pandas._libs.interval import Interval
18+
from pandas._libs.properties import cache_readonly
1819
from pandas._libs.tslibs import (
1920
BaseOffset,
2021
NaT,
@@ -355,7 +356,7 @@ def __hash__(self) -> int:
355356
else:
356357
return -2
357358
# We *do* want to include the real self.ordered here
358-
return int(self._hash_categories(self.categories, self.ordered))
359+
return int(self._hash_categories)
359360

360361
def __eq__(self, other: Any) -> bool:
361362
"""
@@ -429,14 +430,17 @@ def __repr__(self) -> str_type:
429430
data = data.rstrip(", ")
430431
return f"CategoricalDtype(categories={data}, ordered={self.ordered})"
431432

432-
@staticmethod
433-
def _hash_categories(categories, ordered: Ordered = True) -> int:
433+
@cache_readonly
434+
def _hash_categories(self) -> int:
434435
from pandas.core.util.hashing import (
435436
combine_hash_arrays,
436437
hash_array,
437438
hash_tuples,
438439
)
439440

441+
categories = self.categories
442+
ordered = self.ordered
443+
440444
if len(categories) and isinstance(categories[0], tuple):
441445
# assumes if any individual category is a tuple, then all our. ATM
442446
# I don't really want to support just some of the categories being

0 commit comments

Comments
 (0)