Skip to content

Commit d2a9a22

Browse files
authored
REF: avoid object-casting in _get_codes_for_values (#45117)
1 parent 58b6e06 commit d2a9a22

File tree

3 files changed

+6
-32
lines changed

3 files changed

+6
-32
lines changed

pandas/core/algorithms.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def _get_values_for_rank(values: ArrayLike) -> np.ndarray:
298298
return values
299299

300300

301-
def get_data_algo(values: ArrayLike):
301+
def _get_data_algo(values: ArrayLike):
302302
values = _get_values_for_rank(values)
303303

304304
ndtype = _check_object_for_strings(values)
@@ -555,7 +555,7 @@ def factorize_array(
555555
codes : ndarray[np.intp]
556556
uniques : ndarray
557557
"""
558-
hash_klass, values = get_data_algo(values)
558+
hash_klass, values = _get_data_algo(values)
559559

560560
table = hash_klass(size_hint or len(values))
561561
uniques, codes = table.factorize(
@@ -1747,7 +1747,7 @@ def safe_sort(
17471747

17481748
if sorter is None:
17491749
# mixed types
1750-
hash_klass, values = get_data_algo(values)
1750+
hash_klass, values = _get_data_algo(values)
17511751
t = hash_klass(len(values))
17521752
t.map_locations(values)
17531753
sorter = ensure_platform_int(t.lookup(ordered))

pandas/core/arrays/categorical.py

+3-28
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
)
5656
from pandas.core.dtypes.common import (
5757
ensure_int64,
58-
ensure_object,
5958
ensure_platform_int,
6059
is_categorical_dtype,
6160
is_datetime64_dtype,
@@ -96,7 +95,6 @@
9695
import pandas.core.algorithms as algorithms
9796
from pandas.core.algorithms import (
9897
factorize,
99-
get_data_algo,
10098
take_nd,
10199
unique1d,
102100
)
@@ -2760,8 +2758,6 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray:
27602758
27612759
If `values` is known to be a Categorical, use recode_for_categories instead.
27622760
"""
2763-
dtype_equal = is_dtype_equal(values.dtype, categories.dtype)
2764-
27652761
if values.ndim > 1:
27662762
flat = values.ravel()
27672763
codes = _get_codes_for_values(flat, categories)
@@ -2773,30 +2769,9 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray:
27732769
# Categorical(array[Period, Period], categories=PeriodIndex(...))
27742770
cls = categories.dtype.construct_array_type()
27752771
values = maybe_cast_to_extension_array(cls, values)
2776-
if not isinstance(values, cls):
2777-
# exception raised in _from_sequence
2778-
values = ensure_object(values)
2779-
# error: Incompatible types in assignment (expression has type
2780-
# "ndarray", variable has type "Index")
2781-
categories = ensure_object(categories) # type: ignore[assignment]
2782-
elif not dtype_equal:
2783-
values = ensure_object(values)
2784-
# error: Incompatible types in assignment (expression has type "ndarray",
2785-
# variable has type "Index")
2786-
categories = ensure_object(categories) # type: ignore[assignment]
2787-
2788-
if isinstance(categories, ABCIndex):
2789-
return coerce_indexer_dtype(categories.get_indexer_for(values), categories)
2790-
2791-
# Only hit here when we've already coerced to object dtypee.
2792-
2793-
hash_klass, vals = get_data_algo(values)
2794-
# pandas/core/arrays/categorical.py:2661: error: Argument 1 to "get_data_algo" has
2795-
# incompatible type "Index"; expected "Union[ExtensionArray, ndarray]" [arg-type]
2796-
_, cats = get_data_algo(categories) # type: ignore[arg-type]
2797-
t = hash_klass(len(cats))
2798-
t.map_locations(cats)
2799-
return coerce_indexer_dtype(t.lookup(vals), cats)
2772+
2773+
codes = categories.get_indexer_for(values)
2774+
return coerce_indexer_dtype(codes, categories)
28002775

28012776

28022777
def recode_for_categories(

pandas/tests/io/parser/dtypes/test_categorical.py

-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,6 @@ def test_categorical_coerces_timestamp(all_parsers):
269269
tm.assert_frame_equal(result, expected)
270270

271271

272-
@xfail_pyarrow
273272
def test_categorical_coerces_timedelta(all_parsers):
274273
parser = all_parsers
275274
dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))}

0 commit comments

Comments
 (0)