Skip to content

Commit 9122952

Browse files
HyunTruthdatapythonista
authored andcommitted
BUG: DataFrame.drop_duplicates for empty DataFrame raises exception (#22394)
1 parent 982f309 commit 9122952

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

doc/source/whatsnew/v0.24.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,7 @@ Reshaping
723723
- Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`)
724724
- Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`)
725725
- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
726-
-
726+
- Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`)
727727

728728
Build Changes
729729
^^^^^^^^^^^^^

pandas/core/frame.py

+6
Original file line numberDiff line numberDiff line change
@@ -4325,6 +4325,9 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False):
43254325
-------
43264326
deduplicated : DataFrame
43274327
"""
4328+
if self.empty:
4329+
return self.copy()
4330+
43284331
inplace = validate_bool_kwarg(inplace, 'inplace')
43294332
duplicated = self.duplicated(subset, keep=keep)
43304333

@@ -4359,6 +4362,9 @@ def duplicated(self, subset=None, keep='first'):
43594362
from pandas.core.sorting import get_group_index
43604363
from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT
43614364

4365+
if self.empty:
4366+
return Series()
4367+
43624368
def f(vals):
43634369
labels, shape = algorithms.factorize(
43644370
vals, size_hint=min(len(self), _SIZE_HINT_LIMIT))

pandas/tests/frame/test_duplicates.py

+17
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,23 @@ def test_drop_duplicates_tuple():
263263
tm.assert_frame_equal(result, expected)
264264

265265

266+
@pytest.mark.parametrize('df', [
267+
DataFrame(),
268+
DataFrame(columns=[]),
269+
DataFrame(columns=['A', 'B', 'C']),
270+
DataFrame(index=[]),
271+
DataFrame(index=['A', 'B', 'C'])
272+
])
273+
def test_drop_duplicates_empty(df):
274+
# GH 20516
275+
result = df.drop_duplicates()
276+
tm.assert_frame_equal(result, df)
277+
278+
result = df.copy()
279+
result.drop_duplicates(inplace=True)
280+
tm.assert_frame_equal(result, df)
281+
282+
266283
def test_drop_duplicates_NA():
267284
# none
268285
df = DataFrame({'A': [None, None, 'foo', 'bar',

0 commit comments

Comments
 (0)