Skip to content

Commit 8f78bb7

Browse files
jbrockmendeljreback
authored andcommitted
DEPR: get_duplicates (#30004)
1 parent acbba97 commit 8f78bb7

File tree

8 files changed

+3
-108
lines changed

8 files changed

+3
-108
lines changed

doc/redirects.csv

-1
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,6 @@ generated/pandas.Index.equals,../reference/api/pandas.Index.equals
636636
generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize
637637
generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna
638638
generated/pandas.Index.format,../reference/api/pandas.Index.format
639-
generated/pandas.Index.get_duplicates,../reference/api/pandas.Index.get_duplicates
640639
generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for
641640
generated/pandas.Index.get_indexer,../reference/api/pandas.Index.get_indexer
642641
generated/pandas.Index.get_indexer_non_unique,../reference/api/pandas.Index.get_indexer_non_unique

doc/source/reference/indexing.rst

-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ Selecting
152152
Index.asof
153153
Index.asof_locs
154154
Index.contains
155-
Index.get_duplicates
156155
Index.get_indexer
157156
Index.get_indexer_for
158157
Index.get_indexer_non_unique

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
548548
- Removed the previously properties :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`)
549549
- Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`)
550550
- Removed the previously deprecated :meth:`DataFrame.ftypes`, :meth:`Series.ftypes`, :meth:`Series.ftype` (:issue:`26744`)
551-
- Removed the previously deprecated :meth:`Index.get_duplicated`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
551+
- Removed the previously deprecated :meth:`Index.get_duplicates`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
552552
- Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`)
553553
- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`)
554554
- Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`)

pandas/core/indexes/base.py

-62
Original file line numberDiff line numberDiff line change
@@ -2138,68 +2138,6 @@ def duplicated(self, keep="first"):
21382138
"""
21392139
return super().duplicated(keep=keep)
21402140

2141-
def get_duplicates(self):
2142-
"""
2143-
Extract duplicated index elements.
2144-
2145-
.. deprecated:: 0.23.0
2146-
Use idx[idx.duplicated()].unique() instead
2147-
2148-
Returns a sorted list of index elements which appear more than once in
2149-
the index.
2150-
2151-
Returns
2152-
-------
2153-
array-like
2154-
List of duplicated indexes.
2155-
2156-
See Also
2157-
--------
2158-
Index.duplicated : Return boolean array denoting duplicates.
2159-
Index.drop_duplicates : Return Index with duplicates removed.
2160-
2161-
Examples
2162-
--------
2163-
2164-
Works on different Index of types.
2165-
2166-
>>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP
2167-
[2, 3]
2168-
2169-
Note that for a DatetimeIndex, it does not return a list but a new
2170-
DatetimeIndex:
2171-
2172-
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
2173-
... '2018-01-03', '2018-01-04', '2018-01-04'],
2174-
... format='%Y-%m-%d')
2175-
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
2176-
DatetimeIndex(['2018-01-03', '2018-01-04'],
2177-
dtype='datetime64[ns]', freq=None)
2178-
2179-
Sorts duplicated elements even when indexes are unordered.
2180-
2181-
>>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP
2182-
[2, 3]
2183-
2184-
Return empty array-like structure when all elements are unique.
2185-
2186-
>>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP
2187-
[]
2188-
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
2189-
... format='%Y-%m-%d')
2190-
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
2191-
DatetimeIndex([], dtype='datetime64[ns]', freq=None)
2192-
"""
2193-
warnings.warn(
2194-
"'get_duplicates' is deprecated and will be removed in "
2195-
"a future release. You can use "
2196-
"idx[idx.duplicated()].unique() instead",
2197-
FutureWarning,
2198-
stacklevel=2,
2199-
)
2200-
2201-
return self[self.duplicated()].unique()
2202-
22032141
def _get_unique_index(self, dropna=False):
22042142
"""
22052143
Returns an index containing unique values.

pandas/tests/indexes/datetimes/test_datetime.py

-19
Original file line numberDiff line numberDiff line change
@@ -188,25 +188,6 @@ def test_string_index_series_name_converted(self):
188188
result = df.T["1/3/2000"]
189189
assert result.name == df.index[2]
190190

191-
def test_get_duplicates(self):
192-
idx = DatetimeIndex(
193-
[
194-
"2000-01-01",
195-
"2000-01-02",
196-
"2000-01-02",
197-
"2000-01-03",
198-
"2000-01-03",
199-
"2000-01-04",
200-
]
201-
)
202-
203-
with tm.assert_produces_warning(FutureWarning):
204-
# Deprecated - see GH20239
205-
result = idx.get_duplicates()
206-
207-
ex = DatetimeIndex(["2000-01-02", "2000-01-03"])
208-
tm.assert_index_equal(result, ex)
209-
210191
def test_argmin_argmax(self):
211192
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
212193
assert idx.argmin() == 1

pandas/tests/indexes/multi/test_duplicates.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -251,16 +251,13 @@ def test_duplicated_large(keep):
251251
tm.assert_numpy_array_equal(result, expected)
252252

253253

254-
def test_get_duplicates():
254+
def test_duplicated2():
255+
# TODO: more informative test name
255256
# GH5873
256257
for a in [101, 102]:
257258
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
258259
assert not mi.has_duplicates
259260

260-
with tm.assert_produces_warning(FutureWarning):
261-
# Deprecated - see GH20239
262-
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
263-
264261
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
265262

266263
for n in range(1, 6): # 1st level shape
@@ -274,10 +271,6 @@ def test_get_duplicates():
274271
assert len(mi) == (n + 1) * (m + 1)
275272
assert not mi.has_duplicates
276273

277-
with tm.assert_produces_warning(FutureWarning):
278-
# Deprecated - see GH20239
279-
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
280-
281274
tm.assert_numpy_array_equal(
282275
mi.duplicated(), np.zeros(len(mi), dtype="bool")
283276
)

pandas/tests/indexes/test_base.py

-5
Original file line numberDiff line numberDiff line change
@@ -2394,11 +2394,6 @@ def test_cached_properties_not_settable(self):
23942394
with pytest.raises(AttributeError, match="Can't set attribute"):
23952395
index.is_unique = False
23962396

2397-
def test_get_duplicates_deprecated(self):
2398-
index = pd.Index([1, 2, 3])
2399-
with tm.assert_produces_warning(FutureWarning):
2400-
index.get_duplicates()
2401-
24022397
def test_tab_complete_warning(self, ip):
24032398
# https://github.com/pandas-dev/pandas/issues/16409
24042399
pytest.importorskip("IPython", minversion="6.0.0")

pandas/tests/indexes/timedeltas/test_timedelta.py

-10
Original file line numberDiff line numberDiff line change
@@ -179,16 +179,6 @@ def test_sort_values(self):
179179

180180
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]), check_dtype=False)
181181

182-
def test_get_duplicates(self):
183-
idx = TimedeltaIndex(["1 day", "2 day", "2 day", "3 day", "3day", "4day"])
184-
185-
with tm.assert_produces_warning(FutureWarning):
186-
# Deprecated - see GH20239
187-
result = idx.get_duplicates()
188-
189-
ex = TimedeltaIndex(["2 day", "3day"])
190-
tm.assert_index_equal(result, ex)
191-
192182
def test_argmin_argmax(self):
193183
idx = TimedeltaIndex(["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"])
194184
assert idx.argmin() == 1

0 commit comments

Comments
 (0)