|
21 | 21 | import pytz
|
22 | 22 |
|
23 | 23 | from pandas._libs.interval import Interval
|
| 24 | +from pandas._libs.properties import cache_readonly |
24 | 25 | from pandas._libs.tslibs import (
|
25 | 26 | BaseOffset,
|
26 | 27 | NaT,
|
@@ -351,17 +352,6 @@ def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
|
351 | 352 | self._categories = state.pop("categories", None)
|
352 | 353 | self._ordered = state.pop("ordered", False)
|
353 | 354 |
|
354 |
| - def __hash__(self) -> int: |
355 |
| - # _hash_categories returns a uint64, so use the negative |
356 |
| - # space for when we have unknown categories to avoid a conflict |
357 |
| - if self.categories is None: |
358 |
| - if self.ordered: |
359 |
| - return -1 |
360 |
| - else: |
361 |
| - return -2 |
362 |
| - # We *do* want to include the real self.ordered here |
363 |
| - return int(self._hash_categories(self.categories, self.ordered)) |
364 |
| - |
365 | 355 | def __eq__(self, other: Any) -> bool:
|
366 | 356 | """
|
367 | 357 | Rules for CDT equality:
|
@@ -434,14 +424,29 @@ def __repr__(self) -> str_type:
|
434 | 424 | data = data.rstrip(", ")
|
435 | 425 | return f"CategoricalDtype(categories={data}, ordered={self.ordered})"
|
436 | 426 |
|
437 |
| - @staticmethod |
438 |
| - def _hash_categories(categories, ordered: Ordered = True) -> int: |
| 427 | + def __hash__(self) -> int: |
| 428 | + # _hash_categories returns a uint64, so use the negative |
| 429 | + # space for when we have unknown categories to avoid a conflict |
| 430 | + if self.categories is None: |
| 431 | + if self.ordered: |
| 432 | + return -1 |
| 433 | + else: |
| 434 | + return -2 |
| 435 | + # Indirection via property to allow for easier caching |
| 436 | + return self._hash_categories |
| 437 | + |
| 438 | + @cache_readonly |
| 439 | + def _hash_categories(self) -> int: |
439 | 440 | from pandas.core.util.hashing import (
|
440 | 441 | combine_hash_arrays,
|
441 | 442 | hash_array,
|
442 | 443 | hash_tuples,
|
443 | 444 | )
|
444 | 445 |
|
| 446 | + # We *do* want to include the real self.ordered here |
| 447 | + categories = self.categories |
| 448 | + ordered = self.ordered |
| 449 | + |
445 | 450 | if len(categories) and isinstance(categories[0], tuple):
|
446 | 451 | # assumes if any individual category is a tuple, then all our. ATM
|
447 | 452 | # I don't really want to support just some of the categories being
|
@@ -469,7 +474,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
|
469 | 474 | else:
|
470 | 475 | cat_array = [cat_array]
|
471 | 476 | hashed = combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
|
472 |
| - return np.bitwise_xor.reduce(hashed) |
| 477 | + return int(np.bitwise_xor.reduce(hashed)) |
473 | 478 |
|
474 | 479 | @classmethod
|
475 | 480 | def construct_array_type(cls) -> Type[Categorical]:
|
|
0 commit comments