From f7e098a5cffdf26ee929958f63421ceb489e1b0a Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 12:24:13 +0100 Subject: [PATCH 01/15] add docstring index.drop_duplicates moving away from shared docs and write specific docstring for Index and Series object --- pandas/core/indexes/base.py | 46 ++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7e6ae88a26e7c..d0ec3a4252755 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4017,8 +4017,52 @@ def unique(self, level=None): result = super(Index, self).unique() return self._shallow_copy(result) - @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) + #@Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, keep='first'): + """ + Return Index with duplicate values removed. + + The drop_duplicates method can remove occurences or whole sets + of duplicated entries in a pandas.Index object. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + + Returns + ------- + deduplicated : Index + + See Also + -------- + pandas.Series.drop_duplicates : equivalent method on pandas.Series + + Examples + -------- + Generate an index with duplicate values. + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) + + With the 'keep' parameter, the selection behaviour of duplicated values + can be changed. The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> idx.drop_duplicates(keep='first') + Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') + + The value 'last' keeps the last occurrence for each set of duplicated + entries. + + >>> idx.drop_duplicates(keep='last') + Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') + + The value ``False`` discards all sets of duplicated entries. + + >>> idx.drop_duplicates(keep=False) + Index(['cow', 'beetle', 'hippo'], dtype='object') + """ return super(Index, self).drop_duplicates(keep=keep) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) From 17e384696a48b6d19d775199c166f6d47bbf7051 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 12:34:17 +0100 Subject: [PATCH 02/15] clarify: pandas.Index --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d0ec3a4252755..73730aa9a6a74 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4042,7 +4042,7 @@ def drop_duplicates(self, keep='first'): Examples -------- - Generate an index with duplicate values. + Generate an pandas.Index with duplicate values. >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) With the 'keep' parameter, the selection behaviour of duplicated values From dd9d3a4baf65dc98357ff52e53880d01cfb70fc1 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 12:46:45 +0100 Subject: [PATCH 03/15] adjust how parameter False is denoted --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 73730aa9a6a74..dac601ed8733e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4027,7 +4027,7 @@ def drop_duplicates(self, keep='first'): Parameters ---------- - keep : {'first', 'last', False}, default 'first' + keep : {'first', 'last', ``False``}, default 'first' - 'first' : Drop duplicates except for the first occurrence. - 'last' : Drop duplicates except for the last occurrence. - ``False`` : Drop all duplicates. From 13406a1b3e7669ad50399f3177feb51385cfe46c Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 12:48:39 +0100 Subject: [PATCH 04/15] add docstring pandas.Series.drop_duplicates --- pandas/core/series.py | 71 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 069f0372ab6e1..6a5fba8bfe8ba 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1316,8 +1316,77 @@ def unique(self): return result - @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) + #@Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): + """ + Return Series with duplicate values removed. + + The drop_duplicates method can remove occurences or whole sets + of duplicated entries in a pandas.Index object. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + inplace : boolean, default ``False`` + If ``True``, performs operation inplace and returns None. + + Returns + ------- + deduplicated : Series + + See Also + -------- + pandas.Index.drop_duplicates : equivalent method on pandas.Index + + Examples + -------- + Generate an Series with duplicated entries. + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], + ... name='animal') + >>> s + 0 lama + 1 cow + 2 lama + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + With the 'keep' parameter, the selection behaviour of duplicated values + can be changed. The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> s.drop_duplicates() + 0 lama + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + + The value 'last' for parameter 'keep' keeps the last occurrence for + each set of duplicated entries. + + >>> s.drop_duplicates(keep='last') + 1 cow + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + The value ``False`` for parameter 'keep' discards all sets of + duplicated entries. Setting the value of 'inplace' to ``True`` performs + the operation inplace and returns ``None``. + + >>> s.drop_duplicates(keep=False, inplace=True) + >>> s + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + """ return super(Series, self).drop_duplicates(keep=keep, inplace=inplace) @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs) From bcfceafaed54aca930eef8bd7f42bb39dac3e6e4 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 12:51:49 +0100 Subject: [PATCH 05/15] remove appender decorator --- pandas/core/indexes/base.py | 1 - pandas/core/series.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index dac601ed8733e..7e61311f5fb64 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4017,7 +4017,6 @@ def unique(self, level=None): result = super(Index, self).unique() return self._shallow_copy(result) - #@Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, keep='first'): """ Return Index with duplicate values removed. diff --git a/pandas/core/series.py b/pandas/core/series.py index 6a5fba8bfe8ba..379fb0f9908b7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1316,7 +1316,6 @@ def unique(self): return result - #@Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): """ Return Series with duplicate values removed. From 2876b26e13e89b5f526e38596cf41ca06756f93b Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 12:59:24 +0100 Subject: [PATCH 06/15] add whitespace so that html looks pretty --- pandas/core/indexes/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7e61311f5fb64..0b758a4c6bea4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4042,6 +4042,7 @@ def drop_duplicates(self, keep='first'): Examples -------- Generate an pandas.Index with duplicate values. + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) With the 'keep' parameter, the selection behaviour of duplicated values From 863b96113fad0fbab0ac0159827219b22641a66b Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 12:59:54 +0100 Subject: [PATCH 07/15] remove type --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 379fb0f9908b7..0df858ae7511c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1321,7 +1321,7 @@ def drop_duplicates(self, keep='first', inplace=False): Return Series with duplicate values removed. The drop_duplicates method can remove occurences or whole sets - of duplicated entries in a pandas.Index object. + of duplicated entries in a pandas.Series object. Parameters ---------- From b41e1d1d8aebfc4172ca3bc231e49e07f11cb097 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 13:08:17 +0100 Subject: [PATCH 08/15] make html rendering prettier --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0df858ae7511c..b9a7eb8dc0c2e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1325,7 +1325,7 @@ def drop_duplicates(self, keep='first', inplace=False): Parameters ---------- - keep : {'first', 'last', False}, default 'first' + keep : {'first', 'last', ``False``}, default 'first' - 'first' : Drop duplicates except for the first occurrence. - 'last' : Drop duplicates except for the last occurrence. - ``False`` : Drop all duplicates. @@ -1343,6 +1343,7 @@ def drop_duplicates(self, keep='first', inplace=False): Examples -------- Generate an Series with duplicated entries. + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], ... name='animal') >>> s From 6c8de229d20cd2efa70b30f3b4ca4cd33140b9a7 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 13:16:19 +0100 Subject: [PATCH 09/15] remove redundant whitespace --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0b758a4c6bea4..e8dda0e0eab01 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4042,7 +4042,7 @@ def drop_duplicates(self, keep='first'): Examples -------- Generate an pandas.Index with duplicate values. - + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) With the 'keep' parameter, the selection behaviour of duplicated values From 9f8e43812068b2bf1e5f9f33a810602442e23494 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 13:17:21 +0100 Subject: [PATCH 10/15] remove shared docs and appender function decorator in core/base --- pandas/core/base.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 280b8849792e3..fd039480fc6f1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1184,24 +1184,6 @@ def searchsorted(self, value, side='left', sorter=None): # needs coercion on the key (DatetimeIndex does already) return self.values.searchsorted(value, side=side, sorter=sorter) - _shared_docs['drop_duplicates'] = ( - """Return %(klass)s with duplicate values removed - - Parameters - ---------- - - keep : {'first', 'last', False}, default 'first' - - ``first`` : Drop duplicates except for the first occurrence. - - ``last`` : Drop duplicates except for the last occurrence. - - False : Drop all duplicates. - %(inplace)s - - Returns - ------- - deduplicated : %(klass)s - """) - - @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(self, ABCIndexClass): From 0d604ac822bbdbf35b434c9a1989399be5d7ebee Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 13:48:41 +0100 Subject: [PATCH 11/15] update description of keep parameters in examples --- pandas/core/indexes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8dda0e0eab01..74e70d54a1c4c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4045,8 +4045,8 @@ def drop_duplicates(self, keep='first'): >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) - With the 'keep' parameter, the selection behaviour of duplicated values - can be changed. The value 'first' keeps the first occurrence for each + The `keep` parameter controls which duplicate values are removed. + The value 'first' keeps the first occurrence for each set of duplicated entries. The default value of keep is 'first'. >>> idx.drop_duplicates(keep='first') From d92c124dd5668bb7d983473466fb3355f8dd5f5c Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 13:55:37 +0100 Subject: [PATCH 12/15] in see also, remove pandas. --- pandas/core/indexes/base.py | 2 +- pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 74e70d54a1c4c..9517829f799f2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4037,7 +4037,7 @@ def drop_duplicates(self, keep='first'): See Also -------- - pandas.Series.drop_duplicates : equivalent method on pandas.Series + Series.drop_duplicates : equivalent method on Series Examples -------- diff --git a/pandas/core/series.py b/pandas/core/series.py index b9a7eb8dc0c2e..4957740d63407 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1338,7 +1338,7 @@ def drop_duplicates(self, keep='first', inplace=False): See Also -------- - pandas.Index.drop_duplicates : equivalent method on pandas.Index + Index.drop_duplicates : equivalent method on Index Examples -------- From 0453aea1ebbdcd931c46aaf80c02bd46d84ff381 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 15:39:11 +0100 Subject: [PATCH 13/15] removed redundant extended summary --- pandas/core/indexes/base.py | 3 --- pandas/core/series.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9517829f799f2..cc4819ec26d0b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4021,9 +4021,6 @@ def drop_duplicates(self, keep='first'): """ Return Index with duplicate values removed. - The drop_duplicates method can remove occurences or whole sets - of duplicated entries in a pandas.Index object. - Parameters ---------- keep : {'first', 'last', ``False``}, default 'first' diff --git a/pandas/core/series.py b/pandas/core/series.py index 4957740d63407..571d592bd9989 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1320,9 +1320,6 @@ def drop_duplicates(self, keep='first', inplace=False): """ Return Series with duplicate values removed. - The drop_duplicates method can remove occurences or whole sets - of duplicated entries in a pandas.Series object. - Parameters ---------- keep : {'first', 'last', ``False``}, default 'first' From c300ea651d3641645ac9b947aacd3a8ecbc4c149 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 16:20:27 +0100 Subject: [PATCH 14/15] reference DataFrame.drop_duplicates and Series.drop --- pandas/core/series.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 571d592bd9989..59a7a00bddb78 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1336,6 +1336,8 @@ def drop_duplicates(self, keep='first', inplace=False): See Also -------- Index.drop_duplicates : equivalent method on Index + DataFrame.drop_duplicates: equivalent method on DataFrame + Series.duplicated: related method on Series Examples -------- From 8763f332d1ca312cd4685fc9507b34061642da2b Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Sat, 10 Mar 2018 16:38:59 +0100 Subject: [PATCH 15/15] update See Also --- pandas/core/indexes/base.py | 3 +++ pandas/core/series.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cc4819ec26d0b..ae81d0235d2dc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4035,6 +4035,9 @@ def drop_duplicates(self, keep='first'): See Also -------- Series.drop_duplicates : equivalent method on Series + DataFrame.drop_duplicates : equivalent method on DataFrame + Index.duplicated : related method on Index, indicating duplicate + Index values. Examples -------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 59a7a00bddb78..090f599c860ae 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1336,8 +1336,9 @@ def drop_duplicates(self, keep='first', inplace=False): See Also -------- Index.drop_duplicates : equivalent method on Index - DataFrame.drop_duplicates: equivalent method on DataFrame - Series.duplicated: related method on Series + DataFrame.drop_duplicates : equivalent method on DataFrame + Series.duplicated : related method on Series, indicating duplicate + Series values. Examples --------