Skip to content

DEPR: get_duplicates #30004

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/redirects.csv
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,6 @@ generated/pandas.Index.equals,../reference/api/pandas.Index.equals
generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize
generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna
generated/pandas.Index.format,../reference/api/pandas.Index.format
generated/pandas.Index.get_duplicates,../reference/api/pandas.Index.get_duplicates
generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for
generated/pandas.Index.get_indexer,../reference/api/pandas.Index.get_indexer
generated/pandas.Index.get_indexer_non_unique,../reference/api/pandas.Index.get_indexer_non_unique
Expand Down
1 change: 0 additions & 1 deletion doc/source/reference/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ Selecting
Index.asof
Index.asof_locs
Index.contains
Index.get_duplicates
Index.get_indexer
Index.get_indexer_for
Index.get_indexer_non_unique
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed the previously properties :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`)
- Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`)
- Removed the previously deprecated :meth:`DataFrame.ftypes`, :meth:`Series.ftypes`, :meth:`Series.ftype` (:issue:`26744`)
- Removed the previously deprecated :meth:`Index.get_duplicated`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
- Removed the previously deprecated :meth:`Index.get_duplicates`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
- Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`)
- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`)
- Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`)
Expand Down
62 changes: 0 additions & 62 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2138,68 +2138,6 @@ def duplicated(self, keep="first"):
"""
return super().duplicated(keep=keep)

def get_duplicates(self):
"""
Extract duplicated index elements.

.. deprecated:: 0.23.0
Use idx[idx.duplicated()].unique() instead

Returns a sorted list of index elements which appear more than once in
the index.

Returns
-------
array-like
List of duplicated indexes.

See Also
--------
Index.duplicated : Return boolean array denoting duplicates.
Index.drop_duplicates : Return Index with duplicates removed.

Examples
--------

Works on different Index of types.

>>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP
[2, 3]

Note that for a DatetimeIndex, it does not return a list but a new
DatetimeIndex:

>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
... '2018-01-03', '2018-01-04', '2018-01-04'],
... format='%Y-%m-%d')
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
DatetimeIndex(['2018-01-03', '2018-01-04'],
dtype='datetime64[ns]', freq=None)

Sorts duplicated elements even when indexes are unordered.

>>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP
[2, 3]

Return empty array-like structure when all elements are unique.

>>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP
[]
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
... format='%Y-%m-%d')
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
DatetimeIndex([], dtype='datetime64[ns]', freq=None)
"""
warnings.warn(
"'get_duplicates' is deprecated and will be removed in "
"a future release. You can use "
"idx[idx.duplicated()].unique() instead",
FutureWarning,
stacklevel=2,
)

return self[self.duplicated()].unique()

def _get_unique_index(self, dropna=False):
"""
Returns an index containing unique values.
Expand Down
19 changes: 0 additions & 19 deletions pandas/tests/indexes/datetimes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,25 +188,6 @@ def test_string_index_series_name_converted(self):
result = df.T["1/3/2000"]
assert result.name == df.index[2]

def test_get_duplicates(self):
idx = DatetimeIndex(
[
"2000-01-01",
"2000-01-02",
"2000-01-02",
"2000-01-03",
"2000-01-03",
"2000-01-04",
]
)

with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
result = idx.get_duplicates()

ex = DatetimeIndex(["2000-01-02", "2000-01-03"])
tm.assert_index_equal(result, ex)

def test_argmin_argmax(self):
idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
assert idx.argmin() == 1
Expand Down
11 changes: 2 additions & 9 deletions pandas/tests/indexes/multi/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,16 +251,13 @@ def test_duplicated_large(keep):
tm.assert_numpy_array_equal(result, expected)


def test_get_duplicates():
def test_duplicated2():
# TODO: more informative test name
# GH5873
for a in [101, 102]:
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
assert not mi.has_duplicates

with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))

tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))

for n in range(1, 6): # 1st level shape
Expand All @@ -274,10 +271,6 @@ def test_get_duplicates():
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates

with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))

tm.assert_numpy_array_equal(
mi.duplicated(), np.zeros(len(mi), dtype="bool")
)
5 changes: 0 additions & 5 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2394,11 +2394,6 @@ def test_cached_properties_not_settable(self):
with pytest.raises(AttributeError, match="Can't set attribute"):
index.is_unique = False

def test_get_duplicates_deprecated(self):
index = pd.Index([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
index.get_duplicates()

def test_tab_complete_warning(self, ip):
# https://github.com/pandas-dev/pandas/issues/16409
pytest.importorskip("IPython", minversion="6.0.0")
Expand Down
10 changes: 0 additions & 10 deletions pandas/tests/indexes/timedeltas/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,6 @@ def test_sort_values(self):

tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]), check_dtype=False)

def test_get_duplicates(self):
idx = TimedeltaIndex(["1 day", "2 day", "2 day", "3 day", "3day", "4day"])

with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
result = idx.get_duplicates()

ex = TimedeltaIndex(["2 day", "3day"])
tm.assert_index_equal(result, ex)

def test_argmin_argmax(self):
idx = TimedeltaIndex(["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"])
assert idx.argmin() == 1
Expand Down