Skip to content

Commit eb38bb7

Browse files
jbrockmendelKevin D Smith
authored and
Kevin D Smith
committed
PERF: CategoricalDtype.__eq__ (pandas-dev#36280)
1 parent 09e5238 commit eb38bb7

File tree

1 file changed

+22
-4
lines changed

1 file changed

+22
-4
lines changed

pandas/core/dtypes/dtypes.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -375,12 +375,30 @@ def __eq__(self, other: Any) -> bool:
375375
# but same order is not necessary. There is no distinction between
376376
# ordered=False and ordered=None: CDT(., False) and CDT(., None)
377377
# will be equal if they have the same categories.
378-
if (
379-
self.categories.dtype == other.categories.dtype
380-
and self.categories.equals(other.categories)
381-
):
378+
left = self.categories
379+
right = other.categories
380+
381+
# GH#36280 the ordering of checks here is for performance
382+
if not left.dtype == right.dtype:
383+
return False
384+
385+
if len(left) != len(right):
386+
return False
387+
388+
if self.categories.equals(other.categories):
382389
# Check and see if they happen to be identical categories
383390
return True
391+
392+
if left.dtype != object:
393+
# Faster than calculating hash
394+
indexer = left.get_indexer(right)
395+
# Because left and right have the same length and are unique,
396+
# `indexer` not having any -1s implies that there is a
397+
# bijection between `left` and `right`.
398+
return (indexer != -1).all()
399+
400+
# With object-dtype we need a comparison that identifies
401+
# e.g. int(2) as distinct from float(2)
384402
return hash(self) == hash(other)
385403

386404
def __repr__(self) -> str_type:

0 commit comments

Comments
 (0)