From 988542c77da6356ffb4b24f0245387d099a4ba5d Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Sat, 4 Apr 2020 19:10:41 +0700 Subject: [PATCH 1/8] DOC: Fix EX01 in DataFrame.drop_duplicates --- pandas/core/frame.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 71b755bbf9665..93dea57c7ee02 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4673,6 +4673,49 @@ def drop_duplicates( See Also -------- DataFrame.value_counts: Count unique combinations of columns. + + Examples + -------- + + Consider dataset containing ramen rating. + + >>> brand = ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'] + >>> df = pd.DataFrame({'brand': brand, + ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], + ... 'rating': [4, 4, 3.5, 15, 5]}, + ... index=['TH', 'TH', 'ID', 'ID', 'ID']) + + >>> df + brand style rating + TH Yum Yum cup 4.0 + TH Yum Yum cup 4.0 + ID Indomie cup 3.5 + ID Indomie pack 15.0 + ID Indomie pack 5.0 + + By default, it removes duplicate rows based on all columns + + >>> df.drop_duplicates() + brand style rating + TH Yum Yum cup 4.0 + ID Indomie cup 3.5 + ID Indomie pack 15.0 + ID Indomie pack 5.0 + + To remove duplicates on specific column(s), use ``subset`` + + >>> df.drop_duplicates(subset=['brand']) + brand style rating + TH Yum Yum cup 4.0 + ID Indomie cup 3.5 + + To remove drop duplicates and keep last occurences, use ``keep`` + + >>> df.drop_duplicates(subset=['brand', 'style'], keep='last') + brand style rating + TH Yum Yum cup 4.0 + ID Indomie cup 3.5 + ID Indomie pack 5.0 """ if self.empty: return self.copy() From 1618a0c4d75412e9e81fbc8f3e1794edb0813197 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Sat, 4 Apr 2020 19:12:35 +0700 Subject: [PATCH 2/8] DOC: Fix EX01 in DataFrame.drop_duplicates --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 93dea57c7ee02..3de4890cdf083 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4709,7 +4709,7 @@ def drop_duplicates( TH Yum Yum cup 4.0 ID Indomie cup 3.5 - To remove drop duplicates and keep last occurences, use ``keep`` + To remove duplicates and keep last occurences, use ``keep`` >>> df.drop_duplicates(subset=['brand', 'style'], keep='last') brand style rating From f238c81c1d9778fc38047ff97def6f26357fe7c3 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Sun, 5 Apr 2020 11:06:28 +0700 Subject: [PATCH 3/8] Remove empty lines --- pandas/core/frame.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3de4890cdf083..3bae18ab02033 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4676,7 +4676,6 @@ def drop_duplicates( Examples -------- - Consider dataset containing ramen rating. >>> brand = ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'] @@ -4684,7 +4683,6 @@ def drop_duplicates( ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], ... 'rating': [4, 4, 3.5, 15, 5]}, ... index=['TH', 'TH', 'ID', 'ID', 'ID']) - >>> df brand style rating TH Yum Yum cup 4.0 From 71b8e05b6a9ba6d6499fc10736201e2ed06fe7db Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Wed, 8 Apr 2020 09:06:17 +0700 Subject: [PATCH 4/8] Remove index name --- pandas/core/frame.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3bae18ab02033..cfd5d557e0a2b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4681,39 +4681,38 @@ def drop_duplicates( >>> brand = ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'] >>> df = pd.DataFrame({'brand': brand, ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], - ... 'rating': [4, 4, 3.5, 15, 5]}, - ... index=['TH', 'TH', 'ID', 'ID', 'ID']) + ... 'rating': [4, 4, 3.5, 15, 5]}) >>> df brand style rating - TH Yum Yum cup 4.0 - TH Yum Yum cup 4.0 - ID Indomie cup 3.5 - ID Indomie pack 15.0 - ID Indomie pack 5.0 + 0 Yum Yum cup 4.0 + 1 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 3 Indomie pack 15.0 + 4 Indomie pack 5.0 By default, it removes duplicate rows based on all columns >>> df.drop_duplicates() brand style rating - TH Yum Yum cup 4.0 - ID Indomie cup 3.5 - ID Indomie pack 15.0 - ID Indomie pack 5.0 + 0 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 3 Indomie pack 15.0 + 4 Indomie pack 5.0 To remove duplicates on specific column(s), use ``subset`` >>> df.drop_duplicates(subset=['brand']) brand style rating - TH Yum Yum cup 4.0 - ID Indomie cup 3.5 + 0 Yum Yum cup 4.0 + 2 Indomie cup 3.5 To remove duplicates and keep last occurences, use ``keep`` >>> df.drop_duplicates(subset=['brand', 'style'], keep='last') brand style rating - TH Yum Yum cup 4.0 - ID Indomie cup 3.5 - ID Indomie pack 5.0 + 1 Yum Yum cup 4.0 + 2 Indomie cup 3.5 + 4 Indomie pack 5.0 """ if self.empty: return self.copy() From caffe847e79b7d9f4dd14cfa194d01a5f1ac1d78 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Wed, 8 Apr 2020 21:03:21 +0700 Subject: [PATCH 5/8] Fix indentation --- pandas/core/frame.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cfd5d557e0a2b..9abf50204bb47 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4679,9 +4679,11 @@ def drop_duplicates( Consider dataset containing ramen rating. >>> brand = ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'] - >>> df = pd.DataFrame({'brand': brand, + >>> df = pd.DataFrame({ + ... 'brand': brand, ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], - ... 'rating': [4, 4, 3.5, 15, 5]}) + ... 'rating': [4, 4, 3.5, 15, 5] + ... }) >>> df brand style rating 0 Yum Yum cup 4.0 From 6ff5453ba28e917b58ce18345ae428ad2c1e40a0 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Thu, 9 Apr 2020 09:10:01 +0700 Subject: [PATCH 6/8] Add indentation Co-Authored-By: Marc Garcia --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9abf50204bb47..c2db0684c22aa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4680,9 +4680,9 @@ def drop_duplicates( >>> brand = ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'] >>> df = pd.DataFrame({ - ... 'brand': brand, - ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], - ... 'rating': [4, 4, 3.5, 15, 5] + ... 'brand': brand, + ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], + ... 'rating': [4, 4, 3.5, 15, 5] ... }) >>> df brand style rating From cbabca407ac3084d67f63fd057e77ac54f5b1d5d Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Thu, 9 Apr 2020 09:12:03 +0700 Subject: [PATCH 7/8] move brand to fits inline --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c2db0684c22aa..364826efeade2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4678,9 +4678,8 @@ def drop_duplicates( -------- Consider dataset containing ramen rating. - >>> brand = ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'] >>> df = pd.DataFrame({ - ... 'brand': brand, + ... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'], ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], ... 'rating': [4, 4, 3.5, 15, 5] ... }) From 27a20457b34728e4f4265ae2f35aececad10b17d Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Thu, 9 Apr 2020 09:33:39 +0700 Subject: [PATCH 8/8] Add period on sentences --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 364826efeade2..1f73efed82cb9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4691,7 +4691,7 @@ def drop_duplicates( 3 Indomie pack 15.0 4 Indomie pack 5.0 - By default, it removes duplicate rows based on all columns + By default, it removes duplicate rows based on all columns. >>> df.drop_duplicates() brand style rating @@ -4700,14 +4700,14 @@ def drop_duplicates( 3 Indomie pack 15.0 4 Indomie pack 5.0 - To remove duplicates on specific column(s), use ``subset`` + To remove duplicates on specific column(s), use ``subset``. >>> df.drop_duplicates(subset=['brand']) brand style rating 0 Yum Yum cup 4.0 2 Indomie cup 3.5 - To remove duplicates and keep last occurences, use ``keep`` + To remove duplicates and keep last occurences, use ``keep``. >>> df.drop_duplicates(subset=['brand', 'style'], keep='last') brand style rating