diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 819e5a1c32d9b..a4da249894084 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -108,9 +108,7 @@ # --------------- # # dtype access # # --------------- # -def _ensure_data( - values: ArrayLike, dtype: Optional[DtypeObj] = None -) -> Tuple[np.ndarray, DtypeObj]: +def _ensure_data(values: ArrayLike) -> Tuple[np.ndarray, DtypeObj]: """ routine to ensure that our data is of the correct input dtype for lower-level routines @@ -126,8 +124,6 @@ def _ensure_data( Parameters ---------- values : array-like - dtype : pandas_dtype, optional - coerce to this dtype Returns ------- @@ -135,34 +131,26 @@ def _ensure_data( pandas_dtype : np.dtype or ExtensionDtype """ - if dtype is not None: - # We only have non-None dtype when called from `isin`, and - # both Datetimelike and Categorical dispatch before getting here. - assert not needs_i8_conversion(dtype) - assert not is_categorical_dtype(dtype) - if not isinstance(values, ABCMultiIndex): # extract_array would raise values = extract_array(values, extract_numpy=True) # we check some simple dtypes first - if is_object_dtype(dtype): - return ensure_object(np.asarray(values)), np.dtype("object") - elif is_object_dtype(values) and dtype is None: + if is_object_dtype(values): return ensure_object(np.asarray(values)), np.dtype("object") try: - if is_bool_dtype(values) or is_bool_dtype(dtype): + if is_bool_dtype(values): # we are actually coercing to uint64 # until our algos support uint8 directly (see TODO) return np.asarray(values).astype("uint64"), np.dtype("bool") - elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype): + elif is_signed_integer_dtype(values): return ensure_int64(values), np.dtype("int64") - elif is_unsigned_integer_dtype(values) or is_unsigned_integer_dtype(dtype): + elif is_unsigned_integer_dtype(values): return ensure_uint64(values), np.dtype("uint64") - elif is_float_dtype(values) or is_float_dtype(dtype): + elif is_float_dtype(values): return ensure_float64(values), np.dtype("float64") - elif is_complex_dtype(values) or is_complex_dtype(dtype): + elif is_complex_dtype(values): # ignore the fact that we are casting to float # which discards complex parts @@ -177,12 +165,12 @@ def _ensure_data( return ensure_object(values), np.dtype("object") # datetimelike - if needs_i8_conversion(values.dtype) or needs_i8_conversion(dtype): - if is_period_dtype(values.dtype) or is_period_dtype(dtype): + if needs_i8_conversion(values.dtype): + if is_period_dtype(values.dtype): from pandas import PeriodIndex values = PeriodIndex(values)._data - elif is_timedelta64_dtype(values.dtype) or is_timedelta64_dtype(dtype): + elif is_timedelta64_dtype(values.dtype): from pandas import TimedeltaIndex values = TimedeltaIndex(values)._data @@ -202,9 +190,7 @@ def _ensure_data( dtype = values.dtype return values.asi8, dtype - elif is_categorical_dtype(values.dtype) and ( - is_categorical_dtype(dtype) or dtype is None - ): + elif is_categorical_dtype(values.dtype): values = cast("Categorical", values) values = values.codes dtype = pandas_dtype("category")