Skip to content

Commit e5e4ae9

Browse files
DOC: update the pandas.Index.drop_duplicates and pandas.Series.drop_duplicates docstring (#20114)
1 parent d7bcb22 commit e5e4ae9

File tree

3 files changed

+115
-20
lines changed

3 files changed

+115
-20
lines changed

pandas/core/base.py

-18
Original file line numberDiff line numberDiff line change
@@ -1184,24 +1184,6 @@ def searchsorted(self, value, side='left', sorter=None):
11841184
# needs coercion on the key (DatetimeIndex does already)
11851185
return self.values.searchsorted(value, side=side, sorter=sorter)
11861186

1187-
_shared_docs['drop_duplicates'] = (
1188-
"""Return %(klass)s with duplicate values removed
1189-
1190-
Parameters
1191-
----------
1192-
1193-
keep : {'first', 'last', False}, default 'first'
1194-
- ``first`` : Drop duplicates except for the first occurrence.
1195-
- ``last`` : Drop duplicates except for the last occurrence.
1196-
- False : Drop all duplicates.
1197-
%(inplace)s
1198-
1199-
Returns
1200-
-------
1201-
deduplicated : %(klass)s
1202-
""")
1203-
1204-
@Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs)
12051187
def drop_duplicates(self, keep='first', inplace=False):
12061188
inplace = validate_bool_kwarg(inplace, 'inplace')
12071189
if isinstance(self, ABCIndexClass):

pandas/core/indexes/base.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -4017,8 +4017,52 @@ def unique(self, level=None):
40174017
result = super(Index, self).unique()
40184018
return self._shallow_copy(result)
40194019

4020-
@Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs)
40214020
def drop_duplicates(self, keep='first'):
4021+
"""
4022+
Return Index with duplicate values removed.
4023+
4024+
Parameters
4025+
----------
4026+
keep : {'first', 'last', ``False``}, default 'first'
4027+
- 'first' : Drop duplicates except for the first occurrence.
4028+
- 'last' : Drop duplicates except for the last occurrence.
4029+
- ``False`` : Drop all duplicates.
4030+
4031+
Returns
4032+
-------
4033+
deduplicated : Index
4034+
4035+
See Also
4036+
--------
4037+
Series.drop_duplicates : equivalent method on Series
4038+
DataFrame.drop_duplicates : equivalent method on DataFrame
4039+
Index.duplicated : related method on Index, indicating duplicate
4040+
Index values.
4041+
4042+
Examples
4043+
--------
4044+
Generate an pandas.Index with duplicate values.
4045+
4046+
>>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
4047+
4048+
The `keep` parameter controls which duplicate values are removed.
4049+
The value 'first' keeps the first occurrence for each
4050+
set of duplicated entries. The default value of keep is 'first'.
4051+
4052+
>>> idx.drop_duplicates(keep='first')
4053+
Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
4054+
4055+
The value 'last' keeps the last occurrence for each set of duplicated
4056+
entries.
4057+
4058+
>>> idx.drop_duplicates(keep='last')
4059+
Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
4060+
4061+
The value ``False`` discards all sets of duplicated entries.
4062+
4063+
>>> idx.drop_duplicates(keep=False)
4064+
Index(['cow', 'beetle', 'hippo'], dtype='object')
4065+
"""
40224066
return super(Index, self).drop_duplicates(keep=keep)
40234067

40244068
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)

pandas/core/series.py

+70-1
Original file line numberDiff line numberDiff line change
@@ -1316,8 +1316,77 @@ def unique(self):
13161316

13171317
return result
13181318

1319-
@Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs)
13201319
def drop_duplicates(self, keep='first', inplace=False):
1320+
"""
1321+
Return Series with duplicate values removed.
1322+
1323+
Parameters
1324+
----------
1325+
keep : {'first', 'last', ``False``}, default 'first'
1326+
- 'first' : Drop duplicates except for the first occurrence.
1327+
- 'last' : Drop duplicates except for the last occurrence.
1328+
- ``False`` : Drop all duplicates.
1329+
inplace : boolean, default ``False``
1330+
If ``True``, performs operation inplace and returns None.
1331+
1332+
Returns
1333+
-------
1334+
deduplicated : Series
1335+
1336+
See Also
1337+
--------
1338+
Index.drop_duplicates : equivalent method on Index
1339+
DataFrame.drop_duplicates : equivalent method on DataFrame
1340+
Series.duplicated : related method on Series, indicating duplicate
1341+
Series values.
1342+
1343+
Examples
1344+
--------
1345+
Generate an Series with duplicated entries.
1346+
1347+
>>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
1348+
... name='animal')
1349+
>>> s
1350+
0 lama
1351+
1 cow
1352+
2 lama
1353+
3 beetle
1354+
4 lama
1355+
5 hippo
1356+
Name: animal, dtype: object
1357+
1358+
With the 'keep' parameter, the selection behaviour of duplicated values
1359+
can be changed. The value 'first' keeps the first occurrence for each
1360+
set of duplicated entries. The default value of keep is 'first'.
1361+
1362+
>>> s.drop_duplicates()
1363+
0 lama
1364+
1 cow
1365+
3 beetle
1366+
5 hippo
1367+
Name: animal, dtype: object
1368+
1369+
The value 'last' for parameter 'keep' keeps the last occurrence for
1370+
each set of duplicated entries.
1371+
1372+
>>> s.drop_duplicates(keep='last')
1373+
1 cow
1374+
3 beetle
1375+
4 lama
1376+
5 hippo
1377+
Name: animal, dtype: object
1378+
1379+
The value ``False`` for parameter 'keep' discards all sets of
1380+
duplicated entries. Setting the value of 'inplace' to ``True`` performs
1381+
the operation inplace and returns ``None``.
1382+
1383+
>>> s.drop_duplicates(keep=False, inplace=True)
1384+
>>> s
1385+
1 cow
1386+
3 beetle
1387+
5 hippo
1388+
Name: animal, dtype: object
1389+
"""
13211390
return super(Series, self).drop_duplicates(keep=keep, inplace=inplace)
13221391

13231392
@Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs)

0 commit comments

Comments
 (0)