Skip to content

Commit 9c49e64

Browse files
jamestran201-altjreback
authored andcommitted
TST: #15752 Add test_drop_duplicates for Categorical dtypes (#18072)
1 parent 1181622 commit 9c49e64

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

pandas/tests/test_categorical.py

+91
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,97 @@ def test_set_categories_inplace(self):
797797
cat.set_categories(['a', 'b', 'c', 'd'], inplace=True)
798798
tm.assert_index_equal(cat.categories, pd.Index(['a', 'b', 'c', 'd']))
799799

800+
@pytest.mark.parametrize(
801+
"dtype",
802+
["int_", "uint", "float_", "unicode_", "timedelta64[h]",
803+
pytest.param("datetime64[D]",
804+
marks=pytest.mark.xfail(reason="issue7996"))]
805+
)
806+
@pytest.mark.parametrize("is_ordered", [True, False])
807+
def test_drop_duplicates_non_bool(self, dtype, is_ordered):
808+
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
809+
810+
# Test case 1
811+
input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype))
812+
tc1 = Series(Categorical(input1, categories=cat_array,
813+
ordered=is_ordered))
814+
815+
expected = Series([False, False, False, True])
816+
tm.assert_series_equal(tc1.duplicated(), expected)
817+
tm.assert_series_equal(tc1.drop_duplicates(), tc1[~expected])
818+
sc = tc1.copy()
819+
sc.drop_duplicates(inplace=True)
820+
tm.assert_series_equal(sc, tc1[~expected])
821+
822+
expected = Series([False, False, True, False])
823+
tm.assert_series_equal(tc1.duplicated(keep='last'), expected)
824+
tm.assert_series_equal(tc1.drop_duplicates(keep='last'),
825+
tc1[~expected])
826+
sc = tc1.copy()
827+
sc.drop_duplicates(keep='last', inplace=True)
828+
tm.assert_series_equal(sc, tc1[~expected])
829+
830+
expected = Series([False, False, True, True])
831+
tm.assert_series_equal(tc1.duplicated(keep=False), expected)
832+
tm.assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected])
833+
sc = tc1.copy()
834+
sc.drop_duplicates(keep=False, inplace=True)
835+
tm.assert_series_equal(sc, tc1[~expected])
836+
837+
# Test case 2
838+
input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype))
839+
tc2 = Series(Categorical(
840+
input2, categories=cat_array, ordered=is_ordered)
841+
)
842+
843+
expected = Series([False, False, False, False, True, True, False])
844+
tm.assert_series_equal(tc2.duplicated(), expected)
845+
tm.assert_series_equal(tc2.drop_duplicates(), tc2[~expected])
846+
sc = tc2.copy()
847+
sc.drop_duplicates(inplace=True)
848+
tm.assert_series_equal(sc, tc2[~expected])
849+
850+
expected = Series([False, True, True, False, False, False, False])
851+
tm.assert_series_equal(tc2.duplicated(keep='last'), expected)
852+
tm.assert_series_equal(tc2.drop_duplicates(keep='last'),
853+
tc2[~expected])
854+
sc = tc2.copy()
855+
sc.drop_duplicates(keep='last', inplace=True)
856+
tm.assert_series_equal(sc, tc2[~expected])
857+
858+
expected = Series([False, True, True, False, True, True, False])
859+
tm.assert_series_equal(tc2.duplicated(keep=False), expected)
860+
tm.assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected])
861+
sc = tc2.copy()
862+
sc.drop_duplicates(keep=False, inplace=True)
863+
tm.assert_series_equal(sc, tc2[~expected])
864+
865+
@pytest.mark.parametrize("is_ordered", [True, False])
866+
def test_drop_duplicates_bool(self, is_ordered):
867+
tc = Series(Categorical([True, False, True, False],
868+
categories=[True, False], ordered=is_ordered))
869+
870+
expected = Series([False, False, True, True])
871+
tm.assert_series_equal(tc.duplicated(), expected)
872+
tm.assert_series_equal(tc.drop_duplicates(), tc[~expected])
873+
sc = tc.copy()
874+
sc.drop_duplicates(inplace=True)
875+
tm.assert_series_equal(sc, tc[~expected])
876+
877+
expected = Series([True, True, False, False])
878+
tm.assert_series_equal(tc.duplicated(keep='last'), expected)
879+
tm.assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected])
880+
sc = tc.copy()
881+
sc.drop_duplicates(keep='last', inplace=True)
882+
tm.assert_series_equal(sc, tc[~expected])
883+
884+
expected = Series([True, True, True, True])
885+
tm.assert_series_equal(tc.duplicated(keep=False), expected)
886+
tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected])
887+
sc = tc.copy()
888+
sc.drop_duplicates(keep=False, inplace=True)
889+
tm.assert_series_equal(sc, tc[~expected])
890+
800891
def test_describe(self):
801892
# string type
802893
desc = self.factor.describe()

0 commit comments

Comments
 (0)