Skip to content

Commit 8d30b6e

Browse files
authored
ENH: Add lazy copy for drop duplicates (#50431)
* ENH: Add lazy copy for drop duplicates * Improve performance
1 parent 032d112 commit 8d30b6e

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

pandas/core/frame.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -3718,6 +3718,10 @@ def _getitem_bool_array(self, key):
37183718
# check_bool_indexer will throw exception if Series key cannot
37193719
# be reindexed to match DataFrame rows
37203720
key = check_bool_indexer(self.index, key)
3721+
3722+
if key.all():
3723+
return self.copy(deep=None)
3724+
37213725
indexer = key.nonzero()[0]
37223726
return self._take_with_is_copy(indexer, axis=0)
37233727

@@ -6418,7 +6422,7 @@ def drop_duplicates(
64186422
4 Indomie pack 5.0
64196423
"""
64206424
if self.empty:
6421-
return self.copy()
6425+
return self.copy(deep=None)
64226426

64236427
inplace = validate_bool_kwarg(inplace, "inplace")
64246428
ignore_index = validate_bool_kwarg(ignore_index, "ignore_index")

pandas/tests/copy_view/test_methods.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -369,10 +369,11 @@ def test_head_tail(method, using_copy_on_write):
369369
tm.assert_frame_equal(df, df_orig)
370370

371371

372-
def test_assign(using_copy_on_write):
372+
@pytest.mark.parametrize("method", ["assign", "drop_duplicates"])
373+
def test_assign_drop_duplicates(using_copy_on_write, method):
373374
df = DataFrame({"a": [1, 2, 3]})
374375
df_orig = df.copy()
375-
df2 = df.assign()
376+
df2 = getattr(df, method)()
376377
df2._mgr._verify_integrity()
377378

378379
if using_copy_on_write:

0 commit comments

Comments
 (0)