diff --git a/doc/redirects.csv b/doc/redirects.csv index 4d171bc3d400d..599ad6d28a8f5 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -636,7 +636,6 @@ generated/pandas.Index.equals,../reference/api/pandas.Index.equals generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna generated/pandas.Index.format,../reference/api/pandas.Index.format -generated/pandas.Index.get_duplicates,../reference/api/pandas.Index.get_duplicates generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for generated/pandas.Index.get_indexer,../reference/api/pandas.Index.get_indexer generated/pandas.Index.get_indexer_non_unique,../reference/api/pandas.Index.get_indexer_non_unique diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index c155b5e3fcb37..23f8424213574 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -152,7 +152,6 @@ Selecting Index.asof Index.asof_locs Index.contains - Index.get_duplicates Index.get_indexer Index.get_indexer_for Index.get_indexer_non_unique diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 470209a7f4a33..8b1d4c91f3be1 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -548,7 +548,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously properties :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`) - Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`) - Removed the previously deprecated :meth:`DataFrame.ftypes`, :meth:`Series.ftypes`, :meth:`Series.ftype` (:issue:`26744`) -- Removed the previously deprecated :meth:`Index.get_duplicated`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`) +- Removed the previously deprecated :meth:`Index.get_duplicates`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`) - Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`) - Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`) - Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9f96d09b0d3dd..b99e60f8c6278 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2138,68 +2138,6 @@ def duplicated(self, keep="first"): """ return super().duplicated(keep=keep) - def get_duplicates(self): - """ - Extract duplicated index elements. - - .. deprecated:: 0.23.0 - Use idx[idx.duplicated()].unique() instead - - Returns a sorted list of index elements which appear more than once in - the index. - - Returns - ------- - array-like - List of duplicated indexes. - - See Also - -------- - Index.duplicated : Return boolean array denoting duplicates. - Index.drop_duplicates : Return Index with duplicates removed. - - Examples - -------- - - Works on different Index of types. - - >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP - [2, 3] - - Note that for a DatetimeIndex, it does not return a list but a new - DatetimeIndex: - - >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03', - ... '2018-01-03', '2018-01-04', '2018-01-04'], - ... format='%Y-%m-%d') - >>> pd.Index(dates).get_duplicates() # doctest: +SKIP - DatetimeIndex(['2018-01-03', '2018-01-04'], - dtype='datetime64[ns]', freq=None) - - Sorts duplicated elements even when indexes are unordered. - - >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP - [2, 3] - - Return empty array-like structure when all elements are unique. - - >>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP - [] - >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'], - ... format='%Y-%m-%d') - >>> pd.Index(dates).get_duplicates() # doctest: +SKIP - DatetimeIndex([], dtype='datetime64[ns]', freq=None) - """ - warnings.warn( - "'get_duplicates' is deprecated and will be removed in " - "a future release. You can use " - "idx[idx.duplicated()].unique() instead", - FutureWarning, - stacklevel=2, - ) - - return self[self.duplicated()].unique() - def _get_unique_index(self, dropna=False): """ Returns an index containing unique values. diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 4a38e3a146c0e..e3fa6a8321d5f 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -188,25 +188,6 @@ def test_string_index_series_name_converted(self): result = df.T["1/3/2000"] assert result.name == df.index[2] - def test_get_duplicates(self): - idx = DatetimeIndex( - [ - "2000-01-01", - "2000-01-02", - "2000-01-02", - "2000-01-03", - "2000-01-03", - "2000-01-04", - ] - ) - - with tm.assert_produces_warning(FutureWarning): - # Deprecated - see GH20239 - result = idx.get_duplicates() - - ex = DatetimeIndex(["2000-01-02", "2000-01-03"]) - tm.assert_index_equal(result, ex) - def test_argmin_argmax(self): idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) assert idx.argmin() == 1 diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 518bd093b23b1..ee1f068b92df1 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -251,16 +251,13 @@ def test_duplicated_large(keep): tm.assert_numpy_array_equal(result, expected) -def test_get_duplicates(): +def test_duplicated2(): + # TODO: more informative test name # GH5873 for a in [101, 102]: mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) assert not mi.has_duplicates - with tm.assert_produces_warning(FutureWarning): - # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []])) - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool")) for n in range(1, 6): # 1st level shape @@ -274,10 +271,6 @@ def test_get_duplicates(): assert len(mi) == (n + 1) * (m + 1) assert not mi.has_duplicates - with tm.assert_produces_warning(FutureWarning): - # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []])) - tm.assert_numpy_array_equal( mi.duplicated(), np.zeros(len(mi), dtype="bool") ) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 77d81a4a9566e..c0c677b076e2c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2394,11 +2394,6 @@ def test_cached_properties_not_settable(self): with pytest.raises(AttributeError, match="Can't set attribute"): index.is_unique = False - def test_get_duplicates_deprecated(self): - index = pd.Index([1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - index.get_duplicates() - def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index d59b6c18f6042..a62bd7f39e039 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -179,16 +179,6 @@ def test_sort_values(self): tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]), check_dtype=False) - def test_get_duplicates(self): - idx = TimedeltaIndex(["1 day", "2 day", "2 day", "3 day", "3day", "4day"]) - - with tm.assert_produces_warning(FutureWarning): - # Deprecated - see GH20239 - result = idx.get_duplicates() - - ex = TimedeltaIndex(["2 day", "3day"]) - tm.assert_index_equal(result, ex) - def test_argmin_argmax(self): idx = TimedeltaIndex(["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]) assert idx.argmin() == 1