Skip to content

Commit 0823ed4

Browse files
authored
BUG: isin incorrectly casting ints to datetimes (#37528)
* BUG: isin incorrectly casting ints to datetimes * GH ref * add asvs
1 parent 1e21130 commit 0823ed4

File tree

5 files changed

+115
-1
lines changed

5 files changed

+115
-1
lines changed

asv_bench/benchmarks/series_methods.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import NaT, Series, date_range
5+
from pandas import Categorical, NaT, Series, date_range
66

77
from .pandas_vb_common import tm
88

@@ -36,6 +36,28 @@ def time_isin(self, dtypes):
3636
self.s.isin(self.values)
3737

3838

39+
class IsInDatetime64:
40+
def setup(self):
41+
dti = date_range(
42+
start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s"
43+
)
44+
self.ser = Series(dti)
45+
self.subset = self.ser._values[::3]
46+
self.cat_subset = Categorical(self.subset)
47+
48+
def time_isin(self):
49+
self.ser.isin(self.subset)
50+
51+
def time_isin_cat_values(self):
52+
self.ser.isin(self.cat_subset)
53+
54+
def time_isin_mismatched_dtype(self):
55+
self.ser.isin([1, 2])
56+
57+
def time_isin_empty(self):
58+
self.ser.isin([])
59+
60+
3961
class IsInFloat64:
4062
def setup(self):
4163
self.small = Series([1, 2], dtype=np.float64)

doc/source/whatsnew/v1.2.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,9 @@ Datetimelike
540540
- Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`)
541541
- Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`)
542542
- Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`)
543+
- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`DatetimeIndex.isin` incorrectly casting integers to datetimes (:issue:`36621`)
544+
- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`DatetimeIndex.isin` failing to consider timezone-aware and timezone-naive datetimes as always different (:issue:`35728`)
545+
- Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`)
543546

544547
Timedelta
545548
^^^^^^^^^

pandas/core/algorithms.py

+6
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,12 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
430430
# handle categoricals
431431
return cast("Categorical", comps).isin(values)
432432

433+
if needs_i8_conversion(comps):
434+
# Dispatch to DatetimeLikeIndexMixin.isin
435+
from pandas import Index
436+
437+
return Index(comps).isin(values)
438+
433439
comps, dtype = _ensure_data(comps)
434440
values, _ = _ensure_data(values, dtype=dtype)
435441

pandas/core/indexes/datetimelike.py

+28
Original file line numberDiff line numberDiff line change
@@ -521,12 +521,40 @@ def isin(self, values, level=None):
521521
if level is not None:
522522
self._validate_index_level(level)
523523

524+
if not hasattr(values, "dtype"):
525+
values = np.asarray(values)
526+
527+
if values.dtype.kind in ["f", "i", "u", "c"]:
528+
# TODO: de-duplicate with equals, validate_comparison_value
529+
return np.zeros(self.shape, dtype=bool)
530+
524531
if not isinstance(values, type(self)):
532+
inferrable = [
533+
"timedelta",
534+
"timedelta64",
535+
"datetime",
536+
"datetime64",
537+
"date",
538+
"period",
539+
]
540+
if values.dtype == object:
541+
inferred = lib.infer_dtype(values, skipna=False)
542+
if inferred not in inferrable:
543+
if "mixed" in inferred:
544+
return self.astype(object).isin(values)
545+
return np.zeros(self.shape, dtype=bool)
546+
525547
try:
526548
values = type(self)(values)
527549
except ValueError:
528550
return self.astype(object).isin(values)
529551

552+
try:
553+
self._data._check_compatible_with(values)
554+
except (TypeError, ValueError):
555+
# Includes tzawareness mismatch and IncompatibleFrequencyError
556+
return np.zeros(self.shape, dtype=bool)
557+
530558
return algorithms.isin(self.asi8, values.asi8)
531559

532560
def shift(self, periods=1, freq=None):

pandas/tests/series/methods/test_isin.py

+55
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pandas as pd
55
from pandas import Series, date_range
66
import pandas._testing as tm
7+
from pandas.core.arrays import PeriodArray
78

89

910
class TestSeriesIsIn:
@@ -90,6 +91,60 @@ def test_isin_read_only(self):
9091
expected = Series([True, True, True])
9192
tm.assert_series_equal(result, expected)
9293

94+
@pytest.mark.parametrize("dtype", [object, None])
95+
def test_isin_dt64_values_vs_ints(self, dtype):
96+
# GH#36621 dont cast integers to datetimes for isin
97+
dti = date_range("2013-01-01", "2013-01-05")
98+
ser = Series(dti)
99+
100+
comps = np.asarray([1356998400000000000], dtype=dtype)
101+
102+
res = dti.isin(comps)
103+
expected = np.array([False] * len(dti), dtype=bool)
104+
tm.assert_numpy_array_equal(res, expected)
105+
106+
res = ser.isin(comps)
107+
tm.assert_series_equal(res, Series(expected))
108+
109+
res = pd.core.algorithms.isin(ser, comps)
110+
tm.assert_numpy_array_equal(res, expected)
111+
112+
def test_isin_tzawareness_mismatch(self):
113+
dti = date_range("2013-01-01", "2013-01-05")
114+
ser = Series(dti)
115+
116+
other = dti.tz_localize("UTC")
117+
118+
res = dti.isin(other)
119+
expected = np.array([False] * len(dti), dtype=bool)
120+
tm.assert_numpy_array_equal(res, expected)
121+
122+
res = ser.isin(other)
123+
tm.assert_series_equal(res, Series(expected))
124+
125+
res = pd.core.algorithms.isin(ser, other)
126+
tm.assert_numpy_array_equal(res, expected)
127+
128+
def test_isin_period_freq_mismatch(self):
129+
dti = date_range("2013-01-01", "2013-01-05")
130+
pi = dti.to_period("M")
131+
ser = Series(pi)
132+
133+
# We construct another PeriodIndex with the same i8 values
134+
# but different dtype
135+
dtype = dti.to_period("Y").dtype
136+
other = PeriodArray._simple_new(pi.asi8, dtype=dtype)
137+
138+
res = pi.isin(other)
139+
expected = np.array([False] * len(pi), dtype=bool)
140+
tm.assert_numpy_array_equal(res, expected)
141+
142+
res = ser.isin(other)
143+
tm.assert_series_equal(res, Series(expected))
144+
145+
res = pd.core.algorithms.isin(ser, other)
146+
tm.assert_numpy_array_equal(res, expected)
147+
93148

94149
@pytest.mark.slow
95150
def test_isin_large_series_mixed_dtypes_and_nan():

0 commit comments

Comments
 (0)