diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 92124a536fe26..db46b82ab90ac 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -301,6 +301,7 @@ Performance improvements - Performance improvement in :class:`Series` reductions (:issue:`52341`) - Performance improvement in :func:`concat` when ``axis=1`` and objects have different indexes (:issue:`52541`) - Performance improvement in :meth:`.DataFrameGroupBy.groups` (:issue:`53088`) +- Performance improvement in :meth:`DataFrame.isin` for extension dtypes (:issue:`53514`) - Performance improvement in :meth:`DataFrame.loc` when selecting rows and columns (:issue:`53014`) - Performance improvement in :meth:`Series.add` for pyarrow string and binary dtypes (:issue:`53150`) - Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`) @@ -310,6 +311,7 @@ Performance improvements - Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`52525`) - Performance improvement when doing various reshaping operations on :class:`arrays.IntegerArrays` & :class:`arrays.FloatingArray` by avoiding doing unnecessary validation (:issue:`53013`) - Performance improvement when indexing with pyarrow timestamp and duration dtypes (:issue:`53368`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d4c2124182ea5..1524197938a81 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11731,15 +11731,21 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: "to be passed to DataFrame.isin(), " f"you passed a '{type(values).__name__}'" ) - # error: Argument 2 to "isin" has incompatible type "Union[Sequence[Any], - # Mapping[Any, Any]]"; expected "Union[Union[ExtensionArray, - # ndarray[Any, Any]], Index, Series]" - res_values = algorithms.isin( - self.values.ravel(), - values, # type: ignore[arg-type] - ) + + def isin_(x): + # error: Argument 2 to "isin" has incompatible type "Union[Series, + # DataFrame, Sequence[Any], Mapping[Any, Any]]"; expected + # "Union[Union[Union[ExtensionArray, ndarray[Any, Any]], Index, + # Series], List[Any], range]" + result = algorithms.isin( + x.ravel(), + values, # type: ignore[arg-type] + ) + return result.reshape(x.shape) + + res_values = self._mgr.apply(isin_) result = self._constructor( - res_values.reshape(self.shape), + res_values, self.index, self.columns, copy=False, diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py index e924963f588f3..b4511aad27a93 100644 --- a/pandas/tests/frame/methods/test_isin.py +++ b/pandas/tests/frame/methods/test_isin.py @@ -217,3 +217,11 @@ def test_isin_read_only(self): result = df.isin(arr) expected = DataFrame([True, True, True]) tm.assert_frame_equal(result, expected) + + def test_isin_not_lossy(self): + # GH 53514 + val = 1666880195890293744 + df = DataFrame({"a": [val], "b": [1.0]}) + result = df.isin([val]) + expected = DataFrame({"a": [True], "b": [False]}) + tm.assert_frame_equal(result, expected)