@@ -83,7 +83,9 @@ def func(self, other):
83
83
84
84
if not self .ordered and not self .categories .equals (other .categories ):
85
85
# both unordered and different order
86
- other_codes = _get_codes_for_values (other , self .categories )
86
+ other_codes = recode_for_categories (
87
+ other .codes , other .categories , self .categories , copy = False
88
+ )
87
89
else :
88
90
other_codes = other ._codes
89
91
@@ -354,9 +356,7 @@ def __init__(
354
356
dtype = CategoricalDtype (categories , dtype .ordered )
355
357
356
358
elif is_categorical_dtype (values .dtype ):
357
- old_codes = (
358
- values ._values .codes if isinstance (values , ABCSeries ) else values .codes
359
- )
359
+ old_codes = extract_array (values ).codes
360
360
codes = recode_for_categories (
361
361
old_codes , values .dtype .categories , dtype .categories
362
362
)
@@ -1706,17 +1706,9 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
1706
1706
# Indexing on codes is more efficient if categories are the same,
1707
1707
# so we can apply some optimizations based on the degree of
1708
1708
# dtype-matching.
1709
- if self .categories .equals (target .categories ):
1710
- # We use the same codes, so can go directly to the engine
1711
- codes = target .codes
1712
- elif self .is_dtype_equal (target ):
1713
- # We have the same categories up to a reshuffling of codes.
1714
- codes = recode_for_categories (
1715
- target .codes , target .categories , self .categories
1716
- )
1717
- else :
1718
- code_indexer = self .categories .get_indexer (target .categories )
1719
- codes = take_1d (code_indexer , target .codes , fill_value = - 1 )
1709
+ codes = recode_for_categories (
1710
+ target .codes , target .categories , self .categories , copy = False
1711
+ )
1720
1712
else :
1721
1713
codes = self .categories .get_indexer (target )
1722
1714
@@ -2472,9 +2464,11 @@ def _delegate_method(self, name, *args, **kwargs):
2472
2464
# utility routines
2473
2465
2474
2466
2475
- def _get_codes_for_values (values , categories ):
2467
+ def _get_codes_for_values (values , categories ) -> np . ndarray :
2476
2468
"""
2477
2469
utility routine to turn values into codes given the specified categories
2470
+
2471
+ If `values` is known to be a Categorical, use recode_for_categories instead.
2478
2472
"""
2479
2473
dtype_equal = is_dtype_equal (values .dtype , categories .dtype )
2480
2474
@@ -2504,14 +2498,18 @@ def _get_codes_for_values(values, categories):
2504
2498
return coerce_indexer_dtype (t .lookup (vals ), cats )
2505
2499
2506
2500
2507
- def recode_for_categories (codes : np .ndarray , old_categories , new_categories ):
2501
+ def recode_for_categories (
2502
+ codes : np .ndarray , old_categories , new_categories , copy : bool = True
2503
+ ) -> np .ndarray :
2508
2504
"""
2509
2505
Convert a set of codes for to a new set of categories
2510
2506
2511
2507
Parameters
2512
2508
----------
2513
2509
codes : np.ndarray
2514
2510
old_categories, new_categories : Index
2511
+ copy: bool, default True
2512
+ Whether to copy if the codes are unchanged.
2515
2513
2516
2514
Returns
2517
2515
-------
@@ -2527,14 +2525,19 @@ def recode_for_categories(codes: np.ndarray, old_categories, new_categories):
2527
2525
"""
2528
2526
if len (old_categories ) == 0 :
2529
2527
# All null anyway, so just retain the nulls
2530
- return codes .copy ()
2528
+ if copy :
2529
+ return codes .copy ()
2530
+ return codes
2531
2531
elif new_categories .equals (old_categories ):
2532
2532
# Same categories, so no need to actually recode
2533
- return codes .copy ()
2533
+ if copy :
2534
+ return codes .copy ()
2535
+ return codes
2536
+
2534
2537
indexer = coerce_indexer_dtype (
2535
2538
new_categories .get_indexer (old_categories ), new_categories
2536
2539
)
2537
- new_codes = take_1d (indexer , codes . copy () , fill_value = - 1 )
2540
+ new_codes = take_1d (indexer , codes , fill_value = - 1 )
2538
2541
return new_codes
2539
2542
2540
2543
0 commit comments