Skip to content

Commit 8f7673e

Browse files
authored
CLN: de-duplicate recode_for_categories (pandas-dev#37548)
1 parent a7c0494 commit 8f7673e

File tree

1 file changed

+23
-20
lines changed

1 file changed

+23
-20
lines changed

pandas/core/arrays/categorical.py

+23-20
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ def func(self, other):
8383

8484
if not self.ordered and not self.categories.equals(other.categories):
8585
# both unordered and different order
86-
other_codes = _get_codes_for_values(other, self.categories)
86+
other_codes = recode_for_categories(
87+
other.codes, other.categories, self.categories, copy=False
88+
)
8789
else:
8890
other_codes = other._codes
8991

@@ -354,9 +356,7 @@ def __init__(
354356
dtype = CategoricalDtype(categories, dtype.ordered)
355357

356358
elif is_categorical_dtype(values.dtype):
357-
old_codes = (
358-
values._values.codes if isinstance(values, ABCSeries) else values.codes
359-
)
359+
old_codes = extract_array(values).codes
360360
codes = recode_for_categories(
361361
old_codes, values.dtype.categories, dtype.categories
362362
)
@@ -1706,17 +1706,9 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
17061706
# Indexing on codes is more efficient if categories are the same,
17071707
# so we can apply some optimizations based on the degree of
17081708
# dtype-matching.
1709-
if self.categories.equals(target.categories):
1710-
# We use the same codes, so can go directly to the engine
1711-
codes = target.codes
1712-
elif self.is_dtype_equal(target):
1713-
# We have the same categories up to a reshuffling of codes.
1714-
codes = recode_for_categories(
1715-
target.codes, target.categories, self.categories
1716-
)
1717-
else:
1718-
code_indexer = self.categories.get_indexer(target.categories)
1719-
codes = take_1d(code_indexer, target.codes, fill_value=-1)
1709+
codes = recode_for_categories(
1710+
target.codes, target.categories, self.categories, copy=False
1711+
)
17201712
else:
17211713
codes = self.categories.get_indexer(target)
17221714

@@ -2472,9 +2464,11 @@ def _delegate_method(self, name, *args, **kwargs):
24722464
# utility routines
24732465

24742466

2475-
def _get_codes_for_values(values, categories):
2467+
def _get_codes_for_values(values, categories) -> np.ndarray:
24762468
"""
24772469
utility routine to turn values into codes given the specified categories
2470+
2471+
If `values` is known to be a Categorical, use recode_for_categories instead.
24782472
"""
24792473
dtype_equal = is_dtype_equal(values.dtype, categories.dtype)
24802474

@@ -2504,14 +2498,18 @@ def _get_codes_for_values(values, categories):
25042498
return coerce_indexer_dtype(t.lookup(vals), cats)
25052499

25062500

2507-
def recode_for_categories(codes: np.ndarray, old_categories, new_categories):
2501+
def recode_for_categories(
2502+
codes: np.ndarray, old_categories, new_categories, copy: bool = True
2503+
) -> np.ndarray:
25082504
"""
25092505
Convert a set of codes for to a new set of categories
25102506
25112507
Parameters
25122508
----------
25132509
codes : np.ndarray
25142510
old_categories, new_categories : Index
2511+
copy: bool, default True
2512+
Whether to copy if the codes are unchanged.
25152513
25162514
Returns
25172515
-------
@@ -2527,14 +2525,19 @@ def recode_for_categories(codes: np.ndarray, old_categories, new_categories):
25272525
"""
25282526
if len(old_categories) == 0:
25292527
# All null anyway, so just retain the nulls
2530-
return codes.copy()
2528+
if copy:
2529+
return codes.copy()
2530+
return codes
25312531
elif new_categories.equals(old_categories):
25322532
# Same categories, so no need to actually recode
2533-
return codes.copy()
2533+
if copy:
2534+
return codes.copy()
2535+
return codes
2536+
25342537
indexer = coerce_indexer_dtype(
25352538
new_categories.get_indexer(old_categories), new_categories
25362539
)
2537-
new_codes = take_1d(indexer, codes.copy(), fill_value=-1)
2540+
new_codes = take_1d(indexer, codes, fill_value=-1)
25382541
return new_codes
25392542

25402543

0 commit comments

Comments
 (0)