@@ -1713,38 +1713,55 @@ def get_duplicates(self):
1713
1713
"""
1714
1714
Extract duplicated index elements.
1715
1715
1716
- This function returns a sorted list of index elements which appear more
1717
- than once in the index.
1716
+ Returns a sorted list of index elements which appear more than once in
1717
+ the index.
1718
1718
1719
1719
Returns
1720
1720
-------
1721
1721
array-like
1722
- List of duplicated indices .
1722
+ List of duplicated indexes .
1723
1723
1724
1724
See Also
1725
1725
--------
1726
- :meth:` Index.duplicated` : Return boolean array denoting duplicates.
1727
- :meth:` Index.drop_duplicates` : Return Index with duplicates removed.
1726
+ Index.duplicated : Return boolean array denoting duplicates.
1727
+ Index.drop_duplicates : Return Index with duplicates removed.
1728
1728
1729
1729
Examples
1730
1730
--------
1731
- >>> pd.Index([1, 2, 3, 4]).get_duplicates()
1732
- []
1731
+
1732
+ Works on different Index of types.
1733
+
1733
1734
>>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates()
1734
1735
[2, 3]
1735
- >>> pd.Index([1, 2, 3, 2 , 3, 4 , 3]).get_duplicates()
1736
- [2, 3]
1736
+ >>> pd.Index([1. , 2., 2. , 3., 3. , 3., 4. ]).get_duplicates()
1737
+ [2.0 , 3.0 ]
1737
1738
>>> pd.Index(['a', 'b', 'b', 'c', 'c', 'c', 'd']).get_duplicates()
1738
1739
['b', 'c']
1739
- >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02',
1740
- ... '2018-01-03', '2018-01-03'],
1740
+ >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
1741
+ ... '2018-01-03', '2018-01-04', '2018-01-04'],
1742
+ ... format='%Y-%m-%d')
1743
+ >>> pd.Index(dates).get_duplicates()
1744
+ DatetimeIndex(['2018-01-03', '2018-01-04'],
1745
+ dtype='datetime64[ns]', freq=None)
1746
+
1747
+ Sorts duplicated elements even when indexes are unordered.
1748
+
1749
+ >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates()
1750
+ [2, 3]
1751
+
1752
+ Return empty array-like structure when all elements are unique.
1753
+
1754
+ >>> pd.Index([1, 2, 3, 4]).get_duplicates()
1755
+ []
1756
+ >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
1741
1757
... format='%Y-%m-%d')
1742
- >>> pd.Index(pd.to_datetime( dates, format='%Y-%m-%d') ).get_duplicates()
1743
- DatetimeIndex(['2018-01-03' ], dtype='datetime64[ns]', freq=None)
1758
+ >>> pd.Index(dates).get_duplicates()
1759
+ DatetimeIndex([], dtype='datetime64[ns]', freq=None)
1744
1760
1745
1761
Notes
1746
1762
-----
1747
- Returns empty list in case all index elements are unique.
1763
+ In case of datetime-like indexes, the function is overridden where the
1764
+ result is converted to DatetimeIndex.
1748
1765
"""
1749
1766
from collections import defaultdict
1750
1767
counter = defaultdict (lambda : 0 )
0 commit comments