Skip to content

Commit c7b998e

Browse files
authored
DEPR: allowing non-standard types in unique, factorize, isin (#58058)
1 parent 172c6bc commit c7b998e

File tree

4 files changed

+50
-66
lines changed

4 files changed

+50
-66
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ Removal of prior version deprecations/changes
206206
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
207207
- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
208208
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
209+
- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
209210
- Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`)
210211
- Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`)
211212
- Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`)

pandas/core/algorithms.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -226,12 +226,9 @@ def _ensure_arraylike(values, func_name: str) -> ArrayLike:
226226
# GH#52986
227227
if func_name != "isin-targets":
228228
# Make an exception for the comps argument in isin.
229-
warnings.warn(
230-
f"{func_name} with argument that is not not a Series, Index, "
231-
"ExtensionArray, or np.ndarray is deprecated and will raise in a "
232-
"future version.",
233-
FutureWarning,
234-
stacklevel=find_stack_level(),
229+
raise TypeError(
230+
f"{func_name} requires a Series, Index, "
231+
f"ExtensionArray, or np.ndarray, got {type(values).__name__}."
235232
)
236233

237234
inferred = lib.infer_dtype(values, skipna=False)

pandas/tests/libs/test_hashtable.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -730,12 +730,11 @@ def test_mode(self, dtype):
730730

731731
def test_ismember_tuple_with_nans():
732732
# GH-41836
733-
values = [("a", float("nan")), ("b", 1)]
733+
values = np.empty(2, dtype=object)
734+
values[:] = [("a", float("nan")), ("b", 1)]
734735
comps = [("a", float("nan"))]
735736

736-
msg = "isin with argument that is not not a Series"
737-
with tm.assert_produces_warning(FutureWarning, match=msg):
738-
result = isin(values, comps)
737+
result = isin(values, comps)
739738
expected = np.array([True, False], dtype=np.bool_)
740739
tm.assert_numpy_array_equal(result, expected)
741740

pandas/tests/test_algos.py

+43-56
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,13 @@
5454
class TestFactorize:
5555
def test_factorize_complex(self):
5656
# GH#17927
57-
array = [1, 2, 2 + 1j]
58-
msg = "factorize with argument that is not not a Series"
59-
with tm.assert_produces_warning(FutureWarning, match=msg):
60-
labels, uniques = algos.factorize(array)
57+
array = np.array([1, 2, 2 + 1j], dtype=complex)
58+
labels, uniques = algos.factorize(array)
6159

6260
expected_labels = np.array([0, 1, 2], dtype=np.intp)
6361
tm.assert_numpy_array_equal(labels, expected_labels)
6462

65-
# Should return a complex dtype in the future
66-
expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object)
63+
expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=complex)
6764
tm.assert_numpy_array_equal(uniques, expected_uniques)
6865

6966
def test_factorize(self, index_or_series_obj, sort):
@@ -265,9 +262,8 @@ def test_factorizer_object_with_nan(self):
265262
)
266263
def test_factorize_tuple_list(self, data, expected_codes, expected_uniques):
267264
# GH9454
268-
msg = "factorize with argument that is not not a Series"
269-
with tm.assert_produces_warning(FutureWarning, match=msg):
270-
codes, uniques = pd.factorize(data)
265+
data = com.asarray_tuplesafe(data, dtype=object)
266+
codes, uniques = pd.factorize(data)
271267

272268
tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp))
273269

@@ -488,12 +484,12 @@ def test_object_factorize_use_na_sentinel_false(
488484
"data, expected_codes, expected_uniques",
489485
[
490486
(
491-
[1, None, 1, 2],
487+
np.array([1, None, 1, 2], dtype=object),
492488
np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
493489
np.array([1, np.nan, 2], dtype="O"),
494490
),
495491
(
496-
[1, np.nan, 1, 2],
492+
np.array([1, np.nan, 1, 2], dtype=np.float64),
497493
np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
498494
np.array([1, np.nan, 2], dtype=np.float64),
499495
),
@@ -502,9 +498,7 @@ def test_object_factorize_use_na_sentinel_false(
502498
def test_int_factorize_use_na_sentinel_false(
503499
self, data, expected_codes, expected_uniques
504500
):
505-
msg = "factorize with argument that is not not a Series"
506-
with tm.assert_produces_warning(FutureWarning, match=msg):
507-
codes, uniques = algos.factorize(data, use_na_sentinel=False)
501+
codes, uniques = algos.factorize(data, use_na_sentinel=False)
508502

509503
tm.assert_numpy_array_equal(uniques, expected_uniques, strict_nan=True)
510504
tm.assert_numpy_array_equal(codes, expected_codes, strict_nan=True)
@@ -777,9 +771,8 @@ def test_order_of_appearance(self):
777771
result = pd.unique(Series([2] + [1] * 5))
778772
tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64"))
779773

780-
msg = "unique with argument that is not not a Series, Index,"
781-
with tm.assert_produces_warning(FutureWarning, match=msg):
782-
result = pd.unique(list("aabc"))
774+
data = np.array(["a", "a", "b", "c"], dtype=object)
775+
result = pd.unique(data)
783776
expected = np.array(["a", "b", "c"], dtype=object)
784777
tm.assert_numpy_array_equal(result, expected)
785778

@@ -815,9 +808,8 @@ def test_order_of_appearance_dt64tz(self, unit):
815808
)
816809
def test_tuple_with_strings(self, arg, expected):
817810
# see GH 17108
818-
msg = "unique with argument that is not not a Series"
819-
with tm.assert_produces_warning(FutureWarning, match=msg):
820-
result = pd.unique(arg)
811+
arg = com.asarray_tuplesafe(arg, dtype=object)
812+
result = pd.unique(arg)
821813
tm.assert_numpy_array_equal(result, expected)
822814

823815
def test_obj_none_preservation(self):
@@ -904,12 +896,6 @@ def test_invalid(self):
904896
algos.isin([1], 1)
905897

906898
def test_basic(self):
907-
msg = "isin with argument that is not not a Series"
908-
with tm.assert_produces_warning(FutureWarning, match=msg):
909-
result = algos.isin([1, 2], [1])
910-
expected = np.array([True, False])
911-
tm.assert_numpy_array_equal(result, expected)
912-
913899
result = algos.isin(np.array([1, 2]), [1])
914900
expected = np.array([True, False])
915901
tm.assert_numpy_array_equal(result, expected)
@@ -926,21 +912,20 @@ def test_basic(self):
926912
expected = np.array([True, False])
927913
tm.assert_numpy_array_equal(result, expected)
928914

929-
with tm.assert_produces_warning(FutureWarning, match=msg):
930-
result = algos.isin(["a", "b"], ["a"])
915+
arg = np.array(["a", "b"], dtype=object)
916+
result = algos.isin(arg, ["a"])
931917
expected = np.array([True, False])
932918
tm.assert_numpy_array_equal(result, expected)
933919

934-
result = algos.isin(Series(["a", "b"]), Series(["a"]))
920+
result = algos.isin(Series(arg), Series(["a"]))
935921
expected = np.array([True, False])
936922
tm.assert_numpy_array_equal(result, expected)
937923

938-
result = algos.isin(Series(["a", "b"]), {"a"})
924+
result = algos.isin(Series(arg), {"a"})
939925
expected = np.array([True, False])
940926
tm.assert_numpy_array_equal(result, expected)
941927

942-
with tm.assert_produces_warning(FutureWarning, match=msg):
943-
result = algos.isin(["a", "b"], [1])
928+
result = algos.isin(arg, [1])
944929
expected = np.array([False, False])
945930
tm.assert_numpy_array_equal(result, expected)
946931

@@ -1058,12 +1043,10 @@ def test_same_nan_is_in(self):
10581043
# at least, isin() should follow python's "np.nan in [nan] == True"
10591044
# casting to -> np.float64 -> another float-object somewhere on
10601045
# the way could lead jeopardize this behavior
1061-
comps = [np.nan] # could be casted to float64
1046+
comps = np.array([np.nan], dtype=object) # could be casted to float64
10621047
values = [np.nan]
10631048
expected = np.array([True])
1064-
msg = "isin with argument that is not not a Series"
1065-
with tm.assert_produces_warning(FutureWarning, match=msg):
1066-
result = algos.isin(comps, values)
1049+
result = algos.isin(comps, values)
10671050
tm.assert_numpy_array_equal(expected, result)
10681051

10691052
def test_same_nan_is_in_large(self):
@@ -1098,12 +1081,12 @@ def __hash__(self):
10981081

10991082
a, b = LikeNan(), LikeNan()
11001083

1101-
msg = "isin with argument that is not not a Series"
1102-
with tm.assert_produces_warning(FutureWarning, match=msg):
1103-
# same object -> True
1104-
tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True]))
1105-
# different objects -> False
1106-
tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False]))
1084+
arg = np.array([a], dtype=object)
1085+
1086+
# same object -> True
1087+
tm.assert_numpy_array_equal(algos.isin(arg, [a]), np.array([True]))
1088+
# different objects -> False
1089+
tm.assert_numpy_array_equal(algos.isin(arg, [b]), np.array([False]))
11071090

11081091
def test_different_nans(self):
11091092
# GH 22160
@@ -1132,12 +1115,11 @@ def test_different_nans(self):
11321115
def test_no_cast(self):
11331116
# GH 22160
11341117
# ensure 42 is not casted to a string
1135-
comps = ["ss", 42]
1118+
comps = np.array(["ss", 42], dtype=object)
11361119
values = ["42"]
11371120
expected = np.array([False, False])
1138-
msg = "isin with argument that is not not a Series, Index"
1139-
with tm.assert_produces_warning(FutureWarning, match=msg):
1140-
result = algos.isin(comps, values)
1121+
1122+
result = algos.isin(comps, values)
11411123
tm.assert_numpy_array_equal(expected, result)
11421124

11431125
@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
@@ -1658,27 +1640,32 @@ def test_unique_tuples(self, arr, uniques):
16581640
expected = np.empty(len(uniques), dtype=object)
16591641
expected[:] = uniques
16601642

1661-
msg = "unique with argument that is not not a Series"
1662-
with tm.assert_produces_warning(FutureWarning, match=msg):
1663-
result = pd.unique(arr)
1664-
tm.assert_numpy_array_equal(result, expected)
1643+
msg = "unique requires a Series, Index, ExtensionArray, or np.ndarray, got list"
1644+
with pytest.raises(TypeError, match=msg):
1645+
# GH#52986
1646+
pd.unique(arr)
1647+
1648+
res = pd.unique(com.asarray_tuplesafe(arr, dtype=object))
1649+
tm.assert_numpy_array_equal(res, expected)
16651650

16661651
@pytest.mark.parametrize(
16671652
"array,expected",
16681653
[
16691654
(
16701655
[1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j],
1671-
# Should return a complex dtype in the future
1672-
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object),
1656+
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=complex),
16731657
)
16741658
],
16751659
)
16761660
def test_unique_complex_numbers(self, array, expected):
16771661
# GH 17927
1678-
msg = "unique with argument that is not not a Series"
1679-
with tm.assert_produces_warning(FutureWarning, match=msg):
1680-
result = pd.unique(array)
1681-
tm.assert_numpy_array_equal(result, expected)
1662+
msg = "unique requires a Series, Index, ExtensionArray, or np.ndarray, got list"
1663+
with pytest.raises(TypeError, match=msg):
1664+
# GH#52986
1665+
pd.unique(array)
1666+
1667+
res = pd.unique(np.array(array))
1668+
tm.assert_numpy_array_equal(res, expected)
16821669

16831670

16841671
class TestHashTable:

0 commit comments

Comments
 (0)