Skip to content

CLN: Remove redundant tests for .duplicated and .drop_duplicates in tests/base #32487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

102 changes: 0 additions & 102 deletions pandas/tests/base/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,108 +594,6 @@ def test_factorize_repeated(self):
expected = o[5:10].append(o[:5])
tm.assert_index_equal(uniques, expected, check_names=False)

def test_duplicated_drop_duplicates_index(self):
# GH 4060
for original in self.objs:
if isinstance(original, Index):

# special case
if original.is_boolean():
result = original.drop_duplicates()
expected = Index([False, True], name="a")
tm.assert_index_equal(result, expected)
continue

# original doesn't have duplicates
expected = np.array([False] * len(original), dtype=bool)
duplicated = original.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
result = original.drop_duplicates()
tm.assert_index_equal(result, original)
assert result is not original
Comment on lines -609 to -616
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now tested in test_drop_duplicates_no_duplicates in tests/indexes/test_common.py, see below


# has_duplicates
assert not original.has_duplicates

# create repeated values, 3rd and 5th values are duplicated
idx = original[list(range(len(original))) + [5, 3]]
expected = np.array([False] * len(original) + [True, True], dtype=bool)
duplicated = idx.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
tm.assert_index_equal(idx.drop_duplicates(), original)

base = [False] * len(idx)
base[3] = True
base[5] = True
expected = np.array(base)

duplicated = idx.duplicated(keep="last")
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
result = idx.drop_duplicates(keep="last")
tm.assert_index_equal(result, idx[~expected])

base = [False] * len(original) + [True, True]
base[3] = True
base[5] = True
expected = np.array(base)

duplicated = idx.duplicated(keep=False)
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
result = idx.drop_duplicates(keep=False)
tm.assert_index_equal(result, idx[~expected])
Comment on lines -618 to -649
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested in test_drop_duplicates in tests/indexes/test_common.py which was extended and refactored, see below


with pytest.raises(
TypeError,
match=r"drop_duplicates\(\) got an unexpected keyword argument",
):
idx.drop_duplicates(inplace=True)
Comment on lines -651 to -655
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now tested in test_drop_duplicates_inplace in tests/indexes/test_common.py, see below


else:
expected = Series(
[False] * len(original), index=original.index, name="a"
)
tm.assert_series_equal(original.duplicated(), expected)
result = original.drop_duplicates()
tm.assert_series_equal(result, original)
assert result is not original
Comment on lines -658 to -664
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now tested in test_drop_duplicates_no_duplicates in tests/series/methods/test_drop_duplicates.py, see below


idx = original.index[list(range(len(original))) + [5, 3]]
values = original._values[list(range(len(original))) + [5, 3]]
s = Series(values, index=idx, name="a")

expected = Series(
[False] * len(original) + [True, True], index=idx, name="a"
)
tm.assert_series_equal(s.duplicated(), expected)
tm.assert_series_equal(s.drop_duplicates(), original)

base = [False] * len(idx)
base[3] = True
base[5] = True
expected = Series(base, index=idx, name="a")

tm.assert_series_equal(s.duplicated(keep="last"), expected)
tm.assert_series_equal(
s.drop_duplicates(keep="last"), s[~np.array(base)]
)

base = [False] * len(original) + [True, True]
base[3] = True
base[5] = True
expected = Series(base, index=idx, name="a")

tm.assert_series_equal(s.duplicated(keep=False), expected)
tm.assert_series_equal(
s.drop_duplicates(keep=False), s[~np.array(base)]
)

s.drop_duplicates(inplace=True)
tm.assert_series_equal(s, original)
Comment on lines -666 to -697
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


def test_drop_duplicates_series_vs_dataframe(self):
# GH 14192
df = pd.DataFrame(
Expand Down