From c573cb7f20fa544e8ff9f43b14c7b66369e0e67c Mon Sep 17 00:00:00 2001 From: manlattan Date: Fri, 19 Jan 2024 16:51:44 +0530 Subject: [PATCH] BUG: DataFrame.drop_duplicates method fails when a column with a list dtype is present #56784 --- pandas/core/frame.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e093d551f3ead..40b43183b997e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6748,6 +6748,22 @@ def drop_duplicates( DataFrame or None DataFrame with duplicates removed or None if ``inplace=True``. + Notes + ------- + To handle mutable objects such as list, convert the list column + to a tuple before using it in the subset. + + >>> df = pd.DataFrame([ + ... {'number': 1, 'item_ids': [1, 2, 3]}, + ... {'number': 1, 'item_ids': [1, 2, 3]}, + ... ]) + + >>> df['item_ids'] = df['item_ids'].apply(tuple) + >>> df.drop_duplicates(inplace=True) + >>> df['item_ids'] = df['item_ids'].apply(list) + number item_ids + 0 1 [1, 2, 3] + See Also -------- DataFrame.value_counts: Count unique combinations of columns.