diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 3b65bccd48aee..2db46abca119c 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -2,7 +2,7 @@ import numpy as np -from pandas import NaT, Series, date_range +from pandas import Categorical, NaT, Series, date_range from .pandas_vb_common import tm @@ -36,6 +36,28 @@ def time_isin(self, dtypes): self.s.isin(self.values) +class IsInDatetime64: + def setup(self): + dti = date_range( + start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s" + ) + self.ser = Series(dti) + self.subset = self.ser._values[::3] + self.cat_subset = Categorical(self.subset) + + def time_isin(self): + self.ser.isin(self.subset) + + def time_isin_cat_values(self): + self.ser.isin(self.cat_subset) + + def time_isin_mismatched_dtype(self): + self.ser.isin([1, 2]) + + def time_isin_empty(self): + self.ser.isin([]) + + class IsInFloat64: def setup(self): self.small = Series([1, 2], dtype=np.float64) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bc5229d4b4296..0222fe306585f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -538,6 +538,9 @@ Datetimelike - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`) - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`) - Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`) +- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`DatetimeIndex.isin` incorrectly casting integers to datetimes (:issue:`36621`) +- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`DatetimeIndex.isin` failing to consider timezone-aware and timezone-naive datetimes as always different (:issue:`35728`) +- Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ca878d3293c57..be091314e6c25 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -430,6 +430,12 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: # handle categoricals return cast("Categorical", comps).isin(values) + if needs_i8_conversion(comps): + # Dispatch to DatetimeLikeIndexMixin.isin + from pandas import Index + + return Index(comps).isin(values) + comps, dtype = _ensure_data(comps) values, _ = _ensure_data(values, dtype=dtype) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f0b37b810b28a..48db6051b186d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -491,12 +491,40 @@ def isin(self, values, level=None): if level is not None: self._validate_index_level(level) + if not hasattr(values, "dtype"): + values = np.asarray(values) + + if values.dtype.kind in ["f", "i", "u", "c"]: + # TODO: de-duplicate with equals, validate_comparison_value + return np.zeros(self.shape, dtype=bool) + if not isinstance(values, type(self)): + inferrable = [ + "timedelta", + "timedelta64", + "datetime", + "datetime64", + "date", + "period", + ] + if values.dtype == object: + inferred = lib.infer_dtype(values, skipna=False) + if inferred not in inferrable: + if "mixed" in inferred: + return self.astype(object).isin(values) + return np.zeros(self.shape, dtype=bool) + try: values = type(self)(values) except ValueError: return self.astype(object).isin(values) + try: + self._data._check_compatible_with(values) + except (TypeError, ValueError): + # Includes tzawareness mismatch and IncompatibleFrequencyError + return np.zeros(self.shape, dtype=bool) + return algorithms.isin(self.asi8, values.asi8) @Appender(Index.where.__doc__) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 86ea2b2f02a4d..071b1f3f75f44 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -4,6 +4,7 @@ import pandas as pd from pandas import Series, date_range import pandas._testing as tm +from pandas.core.arrays import PeriodArray class TestSeriesIsIn: @@ -90,6 +91,60 @@ def test_isin_read_only(self): expected = Series([True, True, True]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("dtype", [object, None]) + def test_isin_dt64_values_vs_ints(self, dtype): + # GH#36621 dont cast integers to datetimes for isin + dti = date_range("2013-01-01", "2013-01-05") + ser = Series(dti) + + comps = np.asarray([1356998400000000000], dtype=dtype) + + res = dti.isin(comps) + expected = np.array([False] * len(dti), dtype=bool) + tm.assert_numpy_array_equal(res, expected) + + res = ser.isin(comps) + tm.assert_series_equal(res, Series(expected)) + + res = pd.core.algorithms.isin(ser, comps) + tm.assert_numpy_array_equal(res, expected) + + def test_isin_tzawareness_mismatch(self): + dti = date_range("2013-01-01", "2013-01-05") + ser = Series(dti) + + other = dti.tz_localize("UTC") + + res = dti.isin(other) + expected = np.array([False] * len(dti), dtype=bool) + tm.assert_numpy_array_equal(res, expected) + + res = ser.isin(other) + tm.assert_series_equal(res, Series(expected)) + + res = pd.core.algorithms.isin(ser, other) + tm.assert_numpy_array_equal(res, expected) + + def test_isin_period_freq_mismatch(self): + dti = date_range("2013-01-01", "2013-01-05") + pi = dti.to_period("M") + ser = Series(pi) + + # We construct another PeriodIndex with the same i8 values + # but different dtype + dtype = dti.to_period("Y").dtype + other = PeriodArray._simple_new(pi.asi8, dtype=dtype) + + res = pi.isin(other) + expected = np.array([False] * len(pi), dtype=bool) + tm.assert_numpy_array_equal(res, expected) + + res = ser.isin(other) + tm.assert_series_equal(res, Series(expected)) + + res = pd.core.algorithms.isin(ser, other) + tm.assert_numpy_array_equal(res, expected) + @pytest.mark.slow def test_isin_large_series_mixed_dtypes_and_nan():