Skip to content

Commit c5490cf

Browse files
jmholzerMarcoGorelli
authored andcommitted
Deprecate non-keyword arguments for drop_duplicates. (pandas-dev#41500)
* ENH: Deprecate non-keyword arguments for drop_duplicates. * leave newline * ENH: Deprecate non-keyword arguments for drop_duplicates. * ENH: Deprecate non-keyword arguments for drop_duplicates. * ENH: Deprecate non-keyword arguments for drop_duplicates. * ENH: Deprecate non-keyword arguments for drop_duplicates. * ENH: Deprecate non-keyword arguments for drop_duplicates. * ENH: Deprecate non-keyword arguments for drop_duplicates. * ENH: Deprecate non-keyword arguments for drop_duplicates. * remove redundant line * ENH: Deprecate non-keyword arguments for drop_duplicates. Co-authored-by: Marco Gorelli <[email protected]>
1 parent 3bba8b2 commit c5490cf

File tree

9 files changed

+64
-1
lines changed

9 files changed

+64
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,7 @@ Deprecations
680680
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
681681
- Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`)
682682
- Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`)
683+
- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``), :meth:`Series.drop_duplicates`, :meth:`Index.drop_duplicates` and :meth:`MultiIndex.drop_duplicates`(:issue:`41485`)
683684
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
684685
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)
685686

pandas/core/frame.py

+1
Original file line numberDiff line numberDiff line change
@@ -6007,6 +6007,7 @@ def dropna(
60076007
else:
60086008
return result
60096009

6010+
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"])
60106011
def drop_duplicates(
60116012
self,
60126013
subset: Hashable | Sequence[Hashable] | None = None,

pandas/core/indexes/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
from pandas.util._decorators import (
5555
Appender,
5656
cache_readonly,
57+
deprecate_nonkeyword_arguments,
5758
doc,
5859
)
5960

@@ -2651,7 +2652,7 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:
26512652
result = super().unique()
26522653
return self._shallow_copy(result)
26532654

2654-
@final
2655+
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
26552656
def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT:
26562657
"""
26572658
Return Index with duplicate values removed.

pandas/core/indexes/multi.py

+5
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from pandas.util._decorators import (
4242
Appender,
4343
cache_readonly,
44+
deprecate_nonkeyword_arguments,
4445
doc,
4546
)
4647

@@ -3775,6 +3776,10 @@ def isin(self, values, level=None) -> np.ndarray:
37753776
return np.zeros(len(levs), dtype=np.bool_)
37763777
return levs.isin(values)
37773778

3779+
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
3780+
def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex:
3781+
return super().drop_duplicates(keep=keep)
3782+
37783783
# ---------------------------------------------------------------
37793784
# Arithmetic/Numeric Methods - Disabled
37803785

pandas/core/series.py

+1
Original file line numberDiff line numberDiff line change
@@ -2057,6 +2057,7 @@ def drop_duplicates(self, *, inplace: Literal[True]) -> None:
20572057
def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None:
20582058
...
20592059

2060+
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
20602061
def drop_duplicates(self, keep="first", inplace=False) -> Series | None:
20612062
"""
20622063
Return Series with duplicate values removed.

pandas/tests/frame/methods/test_drop_duplicates.py

+14
Original file line numberDiff line numberDiff line change
@@ -471,3 +471,17 @@ def test_drop_duplicates_non_boolean_ignore_index(arg):
471471
msg = '^For argument "ignore_index" expected type bool, received type .*.$'
472472
with pytest.raises(ValueError, match=msg):
473473
df.drop_duplicates(ignore_index=arg)
474+
475+
476+
def test_drop_duplicates_pos_args_deprecation():
477+
# GH#41485
478+
df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]})
479+
msg = (
480+
"In a future version of pandas all arguments of "
481+
"DataFrame.drop_duplicates except for the argument 'subset' "
482+
"will be keyword-only"
483+
)
484+
with tm.assert_produces_warning(FutureWarning, match=msg):
485+
result = df.drop_duplicates(["b", "c"], "last")
486+
expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2])
487+
tm.assert_frame_equal(expected, result)

pandas/tests/indexes/multi/test_duplicates.py

+13
Original file line numberDiff line numberDiff line change
@@ -306,3 +306,16 @@ def test_duplicated_drop_duplicates():
306306
assert duplicated.dtype == bool
307307
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
308308
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
309+
310+
311+
def test_multi_drop_duplicates_pos_args_deprecation():
312+
# GH#41485
313+
idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]])
314+
msg = (
315+
"In a future version of pandas all arguments of "
316+
"MultiIndex.drop_duplicates will be keyword-only"
317+
)
318+
with tm.assert_produces_warning(FutureWarning, match=msg):
319+
result = idx.drop_duplicates("last")
320+
expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]])
321+
tm.assert_index_equal(expected, result)

pandas/tests/indexes/test_base.py

+14
Original file line numberDiff line numberDiff line change
@@ -1738,3 +1738,17 @@ def test_construct_from_memoryview(klass, extra_kwargs):
17381738
result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs)
17391739
expected = klass(range(2000, 2005), **extra_kwargs)
17401740
tm.assert_index_equal(result, expected)
1741+
1742+
1743+
def test_drop_duplicates_pos_args_deprecation():
1744+
# GH#41485
1745+
idx = Index([1, 2, 3, 1])
1746+
msg = (
1747+
"In a future version of pandas all arguments of "
1748+
"Index.drop_duplicates will be keyword-only"
1749+
)
1750+
with tm.assert_produces_warning(FutureWarning, match=msg):
1751+
idx.drop_duplicates("last")
1752+
result = idx.drop_duplicates("last")
1753+
expected = Index([2, 3, 1])
1754+
tm.assert_index_equal(expected, result)

pandas/tests/series/methods/test_drop_duplicates.py

+13
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,16 @@ def test_drop_duplicates_categorical_bool(self, ordered):
223223
return_value = sc.drop_duplicates(keep=False, inplace=True)
224224
assert return_value is None
225225
tm.assert_series_equal(sc, tc[~expected])
226+
227+
228+
def test_drop_duplicates_pos_args_deprecation():
229+
# GH#41485
230+
s = Series(["a", "b", "c", "b"])
231+
msg = (
232+
"In a future version of pandas all arguments of "
233+
"Series.drop_duplicates will be keyword-only"
234+
)
235+
with tm.assert_produces_warning(FutureWarning, match=msg):
236+
result = s.drop_duplicates("last")
237+
expected = Series(["a", "c", "b"], index=[0, 2, 3])
238+
tm.assert_series_equal(expected, result)

0 commit comments

Comments
 (0)