Skip to content

Commit d183436

Browse files
authored
BUG: algos.isin numeric vs datetimelike (#38020)
1 parent 2f1465f commit d183436

File tree

2 files changed

+54
-21
lines changed

2 files changed

+54
-21
lines changed

pandas/core/algorithms.py

+33-21
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
)
5050
from pandas.core.dtypes.generic import (
5151
ABCExtensionArray,
52-
ABCIndex,
5352
ABCIndexClass,
5453
ABCMultiIndex,
5554
ABCSeries,
@@ -69,7 +68,7 @@
6968
# dtype access #
7069
# --------------- #
7170
def _ensure_data(
72-
values, dtype: Optional[DtypeObj] = None
71+
values: ArrayLike, dtype: Optional[DtypeObj] = None
7372
) -> Tuple[np.ndarray, DtypeObj]:
7473
"""
7574
routine to ensure that our data is of the correct
@@ -95,6 +94,12 @@ def _ensure_data(
9594
pandas_dtype : np.dtype or ExtensionDtype
9695
"""
9796

97+
if dtype is not None:
98+
# We only have non-None dtype when called from `isin`, and
99+
# both Datetimelike and Categorical dispatch before getting here.
100+
assert not needs_i8_conversion(dtype)
101+
assert not is_categorical_dtype(dtype)
102+
98103
if not isinstance(values, ABCMultiIndex):
99104
# extract_array would raise
100105
values = extract_array(values, extract_numpy=True)
@@ -131,21 +136,20 @@ def _ensure_data(
131136
return ensure_object(values), np.dtype("object")
132137

133138
# datetimelike
134-
vals_dtype = getattr(values, "dtype", None)
135-
if needs_i8_conversion(vals_dtype) or needs_i8_conversion(dtype):
136-
if is_period_dtype(vals_dtype) or is_period_dtype(dtype):
139+
if needs_i8_conversion(values.dtype) or needs_i8_conversion(dtype):
140+
if is_period_dtype(values.dtype) or is_period_dtype(dtype):
137141
from pandas import PeriodIndex
138142

139-
values = PeriodIndex(values)
143+
values = PeriodIndex(values)._data
140144
dtype = values.dtype
141-
elif is_timedelta64_dtype(vals_dtype) or is_timedelta64_dtype(dtype):
145+
elif is_timedelta64_dtype(values.dtype) or is_timedelta64_dtype(dtype):
142146
from pandas import TimedeltaIndex
143147

144-
values = TimedeltaIndex(values)
148+
values = TimedeltaIndex(values)._data
145149
dtype = values.dtype
146150
else:
147151
# Datetime
148-
if values.ndim > 1 and is_datetime64_ns_dtype(vals_dtype):
152+
if values.ndim > 1 and is_datetime64_ns_dtype(values.dtype):
149153
# Avoid calling the DatetimeIndex constructor as it is 1D only
150154
# Note: this is reached by DataFrame.rank calls GH#27027
151155
# TODO(EA2D): special case not needed with 2D EAs
@@ -155,12 +159,12 @@ def _ensure_data(
155159

156160
from pandas import DatetimeIndex
157161

158-
values = DatetimeIndex(values)
162+
values = DatetimeIndex(values)._data
159163
dtype = values.dtype
160164

161165
return values.asi8, dtype
162166

163-
elif is_categorical_dtype(vals_dtype) and (
167+
elif is_categorical_dtype(values.dtype) and (
164168
is_categorical_dtype(dtype) or dtype is None
165169
):
166170
values = values.codes
@@ -237,11 +241,11 @@ def _ensure_arraylike(values):
237241
}
238242

239243

240-
def _get_hashtable_algo(values):
244+
def _get_hashtable_algo(values: np.ndarray):
241245
"""
242246
Parameters
243247
----------
244-
values : arraylike
248+
values : np.ndarray
245249
246250
Returns
247251
-------
@@ -255,15 +259,15 @@ def _get_hashtable_algo(values):
255259
return htable, values
256260

257261

258-
def _get_values_for_rank(values):
262+
def _get_values_for_rank(values: ArrayLike):
259263
if is_categorical_dtype(values):
260-
values = values._values_for_rank()
264+
values = cast("Categorical", values)._values_for_rank()
261265

262266
values, _ = _ensure_data(values)
263267
return values
264268

265269

266-
def get_data_algo(values):
270+
def get_data_algo(values: ArrayLike):
267271
values = _get_values_for_rank(values)
268272

269273
ndtype = _check_object_for_strings(values)
@@ -421,20 +425,28 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
421425
f"to isin(), you passed a [{type(values).__name__}]"
422426
)
423427

424-
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
428+
if not isinstance(
429+
values, (ABCIndexClass, ABCSeries, ABCExtensionArray, np.ndarray)
430+
):
425431
values = construct_1d_object_array_from_listlike(list(values))
426432
# TODO: could use ensure_arraylike here
433+
elif isinstance(values, ABCMultiIndex):
434+
# Avoid raising in extract_array
435+
values = np.array(values)
427436

428437
comps = _ensure_arraylike(comps)
429438
comps = extract_array(comps, extract_numpy=True)
430-
if is_categorical_dtype(comps):
439+
if is_categorical_dtype(comps.dtype):
431440
# TODO(extension)
432441
# handle categoricals
433442
return cast("Categorical", comps).isin(values)
434443

435-
if needs_i8_conversion(comps):
444+
if needs_i8_conversion(comps.dtype):
436445
# Dispatch to DatetimeLikeArrayMixin.isin
437446
return array(comps).isin(values)
447+
elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype):
448+
# e.g. comps are integers and values are datetime64s
449+
return np.zeros(comps.shape, dtype=bool)
438450

439451
comps, dtype = _ensure_data(comps)
440452
values, _ = _ensure_data(values, dtype=dtype)
@@ -474,7 +486,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
474486

475487

476488
def factorize_array(
477-
values, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None
489+
values: np.ndarray, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None
478490
) -> Tuple[np.ndarray, np.ndarray]:
479491
"""
480492
Factorize an array-like to codes and uniques.
@@ -838,7 +850,7 @@ def value_counts_arraylike(values, dropna: bool):
838850
return keys, counts
839851

840852

841-
def duplicated(values, keep="first") -> np.ndarray:
853+
def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray:
842854
"""
843855
Return boolean ndarray denoting duplicate values.
844856

pandas/tests/test_algos.py

+21
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,27 @@ def test_i8(self):
842842
expected = np.array([True, True, False])
843843
tm.assert_numpy_array_equal(result, expected)
844844

845+
@pytest.mark.parametrize("dtype1", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"])
846+
@pytest.mark.parametrize("dtype", ["i8", "f8", "u8"])
847+
def test_isin_datetimelike_values_numeric_comps(self, dtype, dtype1):
848+
# Anything but object and we get all-False shortcut
849+
850+
dta = date_range("2013-01-01", periods=3)._values
851+
if dtype1 == "period[D]":
852+
# TODO: fix Series.view to get this on its own
853+
arr = dta.to_period("D")
854+
elif dtype1 == "M8[ns, UTC]":
855+
# TODO: fix Series.view to get this on its own
856+
arr = dta.tz_localize("UTC")
857+
else:
858+
arr = Series(dta.view("i8")).view(dtype1)._values
859+
860+
comps = arr.view("i8").astype(dtype)
861+
862+
result = algos.isin(comps, arr)
863+
expected = np.zeros(comps.shape, dtype=bool)
864+
tm.assert_numpy_array_equal(result, expected)
865+
845866
def test_large(self):
846867
s = date_range("20000101", periods=2000000, freq="s").values
847868
result = algos.isin(s, s[0:2])

0 commit comments

Comments
 (0)