Skip to content

Commit e3f1cf1

Browse files
authored
REF: collect DataFrame.drop, Series.drop tests (#32994)
1 parent e5650c8 commit e3f1cf1

File tree

5 files changed

+294
-288
lines changed

5 files changed

+294
-288
lines changed

pandas/tests/frame/methods/test_drop.py

+206
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1+
import re
2+
13
import numpy as np
24
import pytest
35

6+
from pandas.errors import PerformanceWarning
7+
48
import pandas as pd
9+
from pandas import DataFrame, Index, MultiIndex
510
import pandas._testing as tm
611

712

@@ -52,3 +57,204 @@ def test_drop_with_non_unique_datetime_index_and_invalid_keys():
5257

5358
with pytest.raises(KeyError, match="not found in axis"):
5459
df.drop(["a", "b"]) # Dropping with labels not exist in the index
60+
61+
62+
class TestDataFrameDrop:
63+
def test_drop_names(self):
64+
df = DataFrame(
65+
[[1, 2, 3], [3, 4, 5], [5, 6, 7]],
66+
index=["a", "b", "c"],
67+
columns=["d", "e", "f"],
68+
)
69+
df.index.name, df.columns.name = "first", "second"
70+
df_dropped_b = df.drop("b")
71+
df_dropped_e = df.drop("e", axis=1)
72+
df_inplace_b, df_inplace_e = df.copy(), df.copy()
73+
df_inplace_b.drop("b", inplace=True)
74+
df_inplace_e.drop("e", axis=1, inplace=True)
75+
for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e):
76+
assert obj.index.name == "first"
77+
assert obj.columns.name == "second"
78+
assert list(df.columns) == ["d", "e", "f"]
79+
80+
msg = r"\['g'\] not found in axis"
81+
with pytest.raises(KeyError, match=msg):
82+
df.drop(["g"])
83+
with pytest.raises(KeyError, match=msg):
84+
df.drop(["g"], 1)
85+
86+
# errors = 'ignore'
87+
dropped = df.drop(["g"], errors="ignore")
88+
expected = Index(["a", "b", "c"], name="first")
89+
tm.assert_index_equal(dropped.index, expected)
90+
91+
dropped = df.drop(["b", "g"], errors="ignore")
92+
expected = Index(["a", "c"], name="first")
93+
tm.assert_index_equal(dropped.index, expected)
94+
95+
dropped = df.drop(["g"], axis=1, errors="ignore")
96+
expected = Index(["d", "e", "f"], name="second")
97+
tm.assert_index_equal(dropped.columns, expected)
98+
99+
dropped = df.drop(["d", "g"], axis=1, errors="ignore")
100+
expected = Index(["e", "f"], name="second")
101+
tm.assert_index_equal(dropped.columns, expected)
102+
103+
# GH 16398
104+
dropped = df.drop([], errors="ignore")
105+
expected = Index(["a", "b", "c"], name="first")
106+
tm.assert_index_equal(dropped.index, expected)
107+
108+
def test_drop(self):
109+
simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]})
110+
tm.assert_frame_equal(simple.drop("A", axis=1), simple[["B"]])
111+
tm.assert_frame_equal(simple.drop(["A", "B"], axis="columns"), simple[[]])
112+
tm.assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :])
113+
tm.assert_frame_equal(simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :])
114+
115+
with pytest.raises(KeyError, match=r"\[5\] not found in axis"):
116+
simple.drop(5)
117+
with pytest.raises(KeyError, match=r"\['C'\] not found in axis"):
118+
simple.drop("C", 1)
119+
with pytest.raises(KeyError, match=r"\[5\] not found in axis"):
120+
simple.drop([1, 5])
121+
with pytest.raises(KeyError, match=r"\['C'\] not found in axis"):
122+
simple.drop(["A", "C"], 1)
123+
124+
# errors = 'ignore'
125+
tm.assert_frame_equal(simple.drop(5, errors="ignore"), simple)
126+
tm.assert_frame_equal(
127+
simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :]
128+
)
129+
tm.assert_frame_equal(simple.drop("C", axis=1, errors="ignore"), simple)
130+
tm.assert_frame_equal(
131+
simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]]
132+
)
133+
134+
# non-unique - wheee!
135+
nu_df = DataFrame(
136+
list(zip(range(3), range(-3, 1), list("abc"))), columns=["a", "a", "b"]
137+
)
138+
tm.assert_frame_equal(nu_df.drop("a", axis=1), nu_df[["b"]])
139+
tm.assert_frame_equal(nu_df.drop("b", axis="columns"), nu_df["a"])
140+
tm.assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398
141+
142+
nu_df = nu_df.set_index(pd.Index(["X", "Y", "X"]))
143+
nu_df.columns = list("abc")
144+
tm.assert_frame_equal(nu_df.drop("X", axis="rows"), nu_df.loc[["Y"], :])
145+
tm.assert_frame_equal(nu_df.drop(["X", "Y"], axis=0), nu_df.loc[[], :])
146+
147+
# inplace cache issue
148+
# GH#5628
149+
df = pd.DataFrame(np.random.randn(10, 3), columns=list("abc"))
150+
expected = df[~(df.b > 0)]
151+
df.drop(labels=df[df.b > 0].index, inplace=True)
152+
tm.assert_frame_equal(df, expected)
153+
154+
def test_drop_multiindex_not_lexsorted(self):
155+
# GH#11640
156+
157+
# define the lexsorted version
158+
lexsorted_mi = MultiIndex.from_tuples(
159+
[("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"]
160+
)
161+
lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
162+
assert lexsorted_df.columns.is_lexsorted()
163+
164+
# define the non-lexsorted version
165+
not_lexsorted_df = DataFrame(
166+
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
167+
)
168+
not_lexsorted_df = not_lexsorted_df.pivot_table(
169+
index="a", columns=["b", "c"], values="d"
170+
)
171+
not_lexsorted_df = not_lexsorted_df.reset_index()
172+
assert not not_lexsorted_df.columns.is_lexsorted()
173+
174+
# compare the results
175+
tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
176+
177+
expected = lexsorted_df.drop("a", axis=1)
178+
with tm.assert_produces_warning(PerformanceWarning):
179+
result = not_lexsorted_df.drop("a", axis=1)
180+
181+
tm.assert_frame_equal(result, expected)
182+
183+
def test_drop_api_equivalence(self):
184+
# equivalence of the labels/axis and index/columns API's (GH#12392)
185+
df = DataFrame(
186+
[[1, 2, 3], [3, 4, 5], [5, 6, 7]],
187+
index=["a", "b", "c"],
188+
columns=["d", "e", "f"],
189+
)
190+
191+
res1 = df.drop("a")
192+
res2 = df.drop(index="a")
193+
tm.assert_frame_equal(res1, res2)
194+
195+
res1 = df.drop("d", 1)
196+
res2 = df.drop(columns="d")
197+
tm.assert_frame_equal(res1, res2)
198+
199+
res1 = df.drop(labels="e", axis=1)
200+
res2 = df.drop(columns="e")
201+
tm.assert_frame_equal(res1, res2)
202+
203+
res1 = df.drop(["a"], axis=0)
204+
res2 = df.drop(index=["a"])
205+
tm.assert_frame_equal(res1, res2)
206+
207+
res1 = df.drop(["a"], axis=0).drop(["d"], axis=1)
208+
res2 = df.drop(index=["a"], columns=["d"])
209+
tm.assert_frame_equal(res1, res2)
210+
211+
msg = "Cannot specify both 'labels' and 'index'/'columns'"
212+
with pytest.raises(ValueError, match=msg):
213+
df.drop(labels="a", index="b")
214+
215+
with pytest.raises(ValueError, match=msg):
216+
df.drop(labels="a", columns="b")
217+
218+
msg = "Need to specify at least one of 'labels', 'index' or 'columns'"
219+
with pytest.raises(ValueError, match=msg):
220+
df.drop(axis=1)
221+
222+
data = [[1, 2, 3], [1, 2, 3]]
223+
224+
@pytest.mark.parametrize(
225+
"actual",
226+
[
227+
DataFrame(data=data, index=["a", "a"]),
228+
DataFrame(data=data, index=["a", "b"]),
229+
DataFrame(data=data, index=["a", "b"]).set_index([0, 1]),
230+
DataFrame(data=data, index=["a", "a"]).set_index([0, 1]),
231+
],
232+
)
233+
def test_raise_on_drop_duplicate_index(self, actual):
234+
235+
# GH#19186
236+
level = 0 if isinstance(actual.index, MultiIndex) else None
237+
msg = re.escape("\"['c'] not found in axis\"")
238+
with pytest.raises(KeyError, match=msg):
239+
actual.drop("c", level=level, axis=0)
240+
with pytest.raises(KeyError, match=msg):
241+
actual.T.drop("c", level=level, axis=1)
242+
expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore")
243+
tm.assert_frame_equal(expected_no_err, actual)
244+
expected_no_err = actual.T.drop("c", axis=1, level=level, errors="ignore")
245+
tm.assert_frame_equal(expected_no_err.T, actual)
246+
247+
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 2]])
248+
@pytest.mark.parametrize("drop_labels", [[], [1], [2]])
249+
def test_drop_empty_list(self, index, drop_labels):
250+
# GH#21494
251+
expected_index = [i for i in index if i not in drop_labels]
252+
frame = pd.DataFrame(index=index).drop(drop_labels)
253+
tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))
254+
255+
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]])
256+
@pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]])
257+
def test_drop_non_empty_list(self, index, drop_labels):
258+
# GH# 21494
259+
with pytest.raises(KeyError, match="not found in axis"):
260+
pd.DataFrame(index=index).drop(drop_labels)

0 commit comments

Comments
 (0)