Skip to content

Commit 0d7a41b

Browse files
committed
REF: implement Categorical.encode_with_my_categories
1 parent 31db14c commit 0d7a41b

File tree

2 files changed

+24
-8
lines changed

2 files changed

+24
-8
lines changed

pandas/core/arrays/categorical.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1694,9 +1694,8 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
16941694
# Indexing on codes is more efficient if categories are the same,
16951695
# so we can apply some optimizations based on the degree of
16961696
# dtype-matching.
1697-
codes = recode_for_categories(
1698-
target.codes, target.categories, self.categories, copy=False
1699-
)
1697+
cat = self.encode_with_my_categories(target)
1698+
codes = cat._codes
17001699
else:
17011700
codes = self.categories.get_indexer(target)
17021701

@@ -1868,8 +1867,8 @@ def _validate_setitem_value(self, value):
18681867
"without identical categories"
18691868
)
18701869
# is_dtype_equal implies categories_match_up_to_permutation
1871-
new_codes = self._validate_listlike(value)
1872-
value = Categorical.from_codes(new_codes, dtype=self.dtype)
1870+
value = self.encode_with_my_categories(value)
1871+
return value._codes
18731872

18741873
# wrap scalars and hashable-listlikes in list
18751874
rvalue = value if not is_hashable(value) else [value]
@@ -2101,8 +2100,8 @@ def equals(self, other: object) -> bool:
21012100
if not isinstance(other, Categorical):
21022101
return False
21032102
elif self._categories_match_up_to_permutation(other):
2104-
other_codes = self._validate_listlike(other)
2105-
return np.array_equal(self._codes, other_codes)
2103+
other = self.encode_with_my_categories(other)
2104+
return np.array_equal(self._codes, other._codes)
21062105
return False
21072106

21082107
@classmethod
@@ -2113,6 +2112,23 @@ def _concat_same_type(self, to_concat):
21132112

21142113
# ------------------------------------------------------------------
21152114

2115+
def encode_with_my_categories(self, other: "Categorical") -> "Categorical":
2116+
"""
2117+
Re-encode another categorical using this Categorical's categories.
2118+
2119+
Notes
2120+
-----
2121+
This assumes we have already checked
2122+
self._categories_match_up_to_permutation(other).
2123+
"""
2124+
# Indexing on codes is more efficient if categories are the same,
2125+
# so we can apply some optimizations based on the degree of
2126+
# dtype-matching.
2127+
codes = recode_for_categories(
2128+
other.codes, other.categories, self.categories, copy=False
2129+
)
2130+
return self._from_backing_data(codes)
2131+
21162132
def _categories_match_up_to_permutation(self, other: "Categorical") -> bool:
21172133
"""
21182134
Returns True if categoricals are the same dtype

pandas/core/dtypes/concat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ def _maybe_unwrap(x):
301301
categories = first.categories
302302
ordered = first.ordered
303303

304-
all_codes = [first._validate_listlike(x) for x in to_union]
304+
all_codes = [first.encode_with_my_categories(x)._codes for x in to_union]
305305
new_codes = np.concatenate(all_codes)
306306

307307
if sort_categories and not ignore_order and ordered:

0 commit comments

Comments
 (0)