@@ -1694,9 +1694,8 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
1694
1694
# Indexing on codes is more efficient if categories are the same,
1695
1695
# so we can apply some optimizations based on the degree of
1696
1696
# dtype-matching.
1697
- codes = recode_for_categories (
1698
- target .codes , target .categories , self .categories , copy = False
1699
- )
1697
+ cat = self .encode_with_my_categories (target )
1698
+ codes = cat ._codes
1700
1699
else :
1701
1700
codes = self .categories .get_indexer (target )
1702
1701
@@ -1868,8 +1867,8 @@ def _validate_setitem_value(self, value):
1868
1867
"without identical categories"
1869
1868
)
1870
1869
# is_dtype_equal implies categories_match_up_to_permutation
1871
- new_codes = self ._validate_listlike (value )
1872
- value = Categorical . from_codes ( new_codes , dtype = self . dtype )
1870
+ value = self .encode_with_my_categories (value )
1871
+ return value . _codes
1873
1872
1874
1873
# wrap scalars and hashable-listlikes in list
1875
1874
rvalue = value if not is_hashable (value ) else [value ]
@@ -2101,8 +2100,8 @@ def equals(self, other: object) -> bool:
2101
2100
if not isinstance (other , Categorical ):
2102
2101
return False
2103
2102
elif self ._categories_match_up_to_permutation (other ):
2104
- other_codes = self ._validate_listlike (other )
2105
- return np .array_equal (self ._codes , other_codes )
2103
+ other = self .encode_with_my_categories (other )
2104
+ return np .array_equal (self ._codes , other . _codes )
2106
2105
return False
2107
2106
2108
2107
@classmethod
@@ -2113,6 +2112,23 @@ def _concat_same_type(self, to_concat):
2113
2112
2114
2113
# ------------------------------------------------------------------
2115
2114
2115
+ def encode_with_my_categories (self , other : "Categorical" ) -> "Categorical" :
2116
+ """
2117
+ Re-encode another categorical using this Categorical's categories.
2118
+
2119
+ Notes
2120
+ -----
2121
+ This assumes we have already checked
2122
+ self._categories_match_up_to_permutation(other).
2123
+ """
2124
+ # Indexing on codes is more efficient if categories are the same,
2125
+ # so we can apply some optimizations based on the degree of
2126
+ # dtype-matching.
2127
+ codes = recode_for_categories (
2128
+ other .codes , other .categories , self .categories , copy = False
2129
+ )
2130
+ return self ._from_backing_data (codes )
2131
+
2116
2132
def _categories_match_up_to_permutation (self , other : "Categorical" ) -> bool :
2117
2133
"""
2118
2134
Returns True if categoricals are the same dtype
0 commit comments