Skip to content

Commit d95a7a7

Browse files
authored
DEPR: casting in datetimelike isin (#56427)
* DEPR: casting in datetimelike isin * GH ref * update doctest
1 parent 2dcb963 commit d95a7a7

File tree

5 files changed

+67
-14
lines changed

5 files changed

+67
-14
lines changed

doc/source/whatsnew/v2.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ Other Deprecations
455455
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_xml` except ``path_or_buffer``. (:issue:`54229`)
456456
- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`)
457457
- Deprecated behavior of :meth:`Index.insert` with an object-dtype index silently performing type inference on the result, explicitly call ``result.infer_objects(copy=False)`` for the old behavior instead (:issue:`51363`)
458+
- Deprecated casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
458459
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
459460
- Deprecated including the groups in computations when using :meth:`.DataFrameGroupBy.apply` and :meth:`.DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
460461
- Deprecated indexing an :class:`Index` with a boolean indexer of length zero (:issue:`55820`)
@@ -526,6 +527,7 @@ Datetimelike
526527
^^^^^^^^^^^^
527528
- Bug in :class:`DatetimeIndex` construction when passing both a ``tz`` and either ``dayfirst`` or ``yearfirst`` ignoring dayfirst/yearfirst (:issue:`55813`)
528529
- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
530+
- Bug in :func:`Series.isin` with :class:`DatetimeTZDtype` dtype and comparison values that are all ``NaT`` incorrectly returning all-``False`` even if the series contains ``NaT`` entries (:issue:`56427`)
529531
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
530532
- Bug in :func:`testing.assert_extension_array_equal` that could use the wrong unit when comparing resolutions (:issue:`55730`)
531533
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)

pandas/_libs/lib.pyx

+5-2
Original file line numberDiff line numberDiff line change
@@ -2756,8 +2756,11 @@ def maybe_convert_objects(ndarray[object] objects,
27562756
res[:] = NPY_NAT
27572757
return res
27582758
elif dtype is not None:
2759-
# EA, we don't expect to get here, but _could_ implement
2760-
raise NotImplementedError(dtype)
2759+
# i.e. PeriodDtype, DatetimeTZDtype
2760+
cls = dtype.construct_array_type()
2761+
obj = cls._from_sequence([], dtype=dtype)
2762+
taker = -np.ones((<object>objects).shape, dtype=np.intp)
2763+
return obj.take(taker, allow_fill=True)
27612764
else:
27622765
# we don't guess
27632766
seen.object_ = True

pandas/core/arrays/datetimelike.py

+21
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,8 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
751751
# TODO: de-duplicate with equals, validate_comparison_value
752752
return np.zeros(self.shape, dtype=bool)
753753

754+
values = ensure_wrapped_if_datetimelike(values)
755+
754756
if not isinstance(values, type(self)):
755757
inferable = [
756758
"timedelta",
@@ -761,6 +763,14 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
761763
"period",
762764
]
763765
if values.dtype == object:
766+
values = lib.maybe_convert_objects(
767+
values,
768+
convert_non_numeric=True,
769+
dtype_if_all_nat=self.dtype,
770+
)
771+
if values.dtype != object:
772+
return self.isin(values)
773+
764774
inferred = lib.infer_dtype(values, skipna=False)
765775
if inferred not in inferable:
766776
if inferred == "string":
@@ -775,6 +785,17 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
775785
values = type(self)._from_sequence(values)
776786
except ValueError:
777787
return isin(self.astype(object), values)
788+
else:
789+
warnings.warn(
790+
# GH#53111
791+
f"The behavior of 'isin' with dtype={self.dtype} and "
792+
"castable values (e.g. strings) is deprecated. In a "
793+
"future version, these will not be considered matching "
794+
"by isin. Explicitly cast to the appropriate dtype before "
795+
"calling isin instead.",
796+
FutureWarning,
797+
stacklevel=find_stack_level(),
798+
)
778799

779800
if self.dtype.kind in "mM":
780801
self = cast("DatetimeArray | TimedeltaArray", self)

pandas/core/indexes/base.py

-12
Original file line numberDiff line numberDiff line change
@@ -6534,18 +6534,6 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
65346534
65356535
>>> midx.isin([(1, 'red'), (3, 'red')])
65366536
array([ True, False, False])
6537-
6538-
For a DatetimeIndex, string values in `values` are converted to
6539-
Timestamps.
6540-
6541-
>>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
6542-
>>> dti = pd.to_datetime(dates)
6543-
>>> dti
6544-
DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
6545-
dtype='datetime64[ns]', freq=None)
6546-
6547-
>>> dti.isin(['2000-03-11'])
6548-
array([ True, False, False])
65496537
"""
65506538
if level is not None:
65516539
self._validate_index_level(level)

pandas/tests/test_algos.py

+39
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,45 @@ def test_large(self):
992992
expected[1] = True
993993
tm.assert_numpy_array_equal(result, expected)
994994

995+
@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"])
996+
def test_isin_datetimelike_all_nat(self, dtype):
997+
# GH#56427
998+
dta = date_range("2013-01-01", periods=3)._values
999+
arr = Series(dta.view("i8")).array.view(dtype)
1000+
1001+
arr[0] = NaT
1002+
result = algos.isin(arr, [NaT])
1003+
expected = np.array([True, False, False], dtype=bool)
1004+
tm.assert_numpy_array_equal(result, expected)
1005+
1006+
@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]"])
1007+
def test_isin_datetimelike_strings_deprecated(self, dtype):
1008+
# GH#53111
1009+
dta = date_range("2013-01-01", periods=3)._values
1010+
arr = Series(dta.view("i8")).array.view(dtype)
1011+
1012+
vals = [str(x) for x in arr]
1013+
msg = "The behavior of 'isin' with dtype=.* is deprecated"
1014+
with tm.assert_produces_warning(FutureWarning, match=msg):
1015+
res = algos.isin(arr, vals)
1016+
assert res.all()
1017+
1018+
vals2 = np.array(vals, dtype=str)
1019+
with tm.assert_produces_warning(FutureWarning, match=msg):
1020+
res2 = algos.isin(arr, vals2)
1021+
assert res2.all()
1022+
1023+
def test_isin_dt64tz_with_nat(self):
1024+
# the all-NaT values used to get inferred to tznaive, which was evaluated
1025+
# as non-matching GH#56427
1026+
dti = date_range("2016-01-01", periods=3, tz="UTC")
1027+
ser = Series(dti)
1028+
ser[0] = NaT
1029+
1030+
res = algos.isin(ser._values, [NaT])
1031+
exp = np.array([True, False, False], dtype=bool)
1032+
tm.assert_numpy_array_equal(res, exp)
1033+
9951034
def test_categorical_from_codes(self):
9961035
# GH 16639
9971036
vals = np.array([0, 1, 2, 0])

0 commit comments

Comments
 (0)