Skip to content

Commit aa71c21

Browse files
authored
REF: .drop tests (#33156)
1 parent 51f114b commit aa71c21

File tree

4 files changed

+188
-177
lines changed

4 files changed

+188
-177
lines changed

pandas/tests/frame/methods/test_drop.py

+160-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas.errors import PerformanceWarning
77

88
import pandas as pd
9-
from pandas import DataFrame, Index, MultiIndex
9+
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
1010
import pandas._testing as tm
1111

1212

@@ -258,3 +258,162 @@ def test_drop_non_empty_list(self, index, drop_labels):
258258
# GH# 21494
259259
with pytest.raises(KeyError, match="not found in axis"):
260260
pd.DataFrame(index=index).drop(drop_labels)
261+
262+
def test_mixed_depth_drop(self):
263+
arrays = [
264+
["a", "top", "top", "routine1", "routine1", "routine2"],
265+
["", "OD", "OD", "result1", "result2", "result1"],
266+
["", "wx", "wy", "", "", ""],
267+
]
268+
269+
tuples = sorted(zip(*arrays))
270+
index = MultiIndex.from_tuples(tuples)
271+
df = DataFrame(np.random.randn(4, 6), columns=index)
272+
273+
result = df.drop("a", axis=1)
274+
expected = df.drop([("a", "", "")], axis=1)
275+
tm.assert_frame_equal(expected, result)
276+
277+
result = df.drop(["top"], axis=1)
278+
expected = df.drop([("top", "OD", "wx")], axis=1)
279+
expected = expected.drop([("top", "OD", "wy")], axis=1)
280+
tm.assert_frame_equal(expected, result)
281+
282+
result = df.drop(("top", "OD", "wx"), axis=1)
283+
expected = df.drop([("top", "OD", "wx")], axis=1)
284+
tm.assert_frame_equal(expected, result)
285+
286+
expected = df.drop([("top", "OD", "wy")], axis=1)
287+
expected = df.drop("top", axis=1)
288+
289+
result = df.drop("result1", level=1, axis=1)
290+
expected = df.drop(
291+
[("routine1", "result1", ""), ("routine2", "result1", "")], axis=1
292+
)
293+
tm.assert_frame_equal(expected, result)
294+
295+
def test_drop_multiindex_other_level_nan(self):
296+
# GH#12754
297+
df = (
298+
DataFrame(
299+
{
300+
"A": ["one", "one", "two", "two"],
301+
"B": [np.nan, 0.0, 1.0, 2.0],
302+
"C": ["a", "b", "c", "c"],
303+
"D": [1, 2, 3, 4],
304+
}
305+
)
306+
.set_index(["A", "B", "C"])
307+
.sort_index()
308+
)
309+
result = df.drop("c", level="C")
310+
expected = DataFrame(
311+
[2, 1],
312+
columns=["D"],
313+
index=pd.MultiIndex.from_tuples(
314+
[("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"]
315+
),
316+
)
317+
tm.assert_frame_equal(result, expected)
318+
319+
def test_drop_nonunique(self):
320+
df = DataFrame(
321+
[
322+
["x-a", "x", "a", 1.5],
323+
["x-a", "x", "a", 1.2],
324+
["z-c", "z", "c", 3.1],
325+
["x-a", "x", "a", 4.1],
326+
["x-b", "x", "b", 5.1],
327+
["x-b", "x", "b", 4.1],
328+
["x-b", "x", "b", 2.2],
329+
["y-a", "y", "a", 1.2],
330+
["z-b", "z", "b", 2.1],
331+
],
332+
columns=["var1", "var2", "var3", "var4"],
333+
)
334+
335+
grp_size = df.groupby("var1").size()
336+
drop_idx = grp_size.loc[grp_size == 1]
337+
338+
idf = df.set_index(["var1", "var2", "var3"])
339+
340+
# it works! GH#2101
341+
result = idf.drop(drop_idx.index, level=0).reset_index()
342+
expected = df[-df.var1.isin(drop_idx.index)]
343+
344+
result.index = expected.index
345+
346+
tm.assert_frame_equal(result, expected)
347+
348+
def test_drop_level(self):
349+
index = MultiIndex(
350+
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
351+
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
352+
names=["first", "second"],
353+
)
354+
frame = DataFrame(
355+
np.random.randn(10, 3),
356+
index=index,
357+
columns=Index(["A", "B", "C"], name="exp"),
358+
)
359+
360+
result = frame.drop(["bar", "qux"], level="first")
361+
expected = frame.iloc[[0, 1, 2, 5, 6]]
362+
tm.assert_frame_equal(result, expected)
363+
364+
result = frame.drop(["two"], level="second")
365+
expected = frame.iloc[[0, 2, 3, 6, 7, 9]]
366+
tm.assert_frame_equal(result, expected)
367+
368+
result = frame.T.drop(["bar", "qux"], axis=1, level="first")
369+
expected = frame.iloc[[0, 1, 2, 5, 6]].T
370+
tm.assert_frame_equal(result, expected)
371+
372+
result = frame.T.drop(["two"], axis=1, level="second")
373+
expected = frame.iloc[[0, 2, 3, 6, 7, 9]].T
374+
tm.assert_frame_equal(result, expected)
375+
376+
def test_drop_level_nonunique_datetime(self):
377+
# GH#12701
378+
idx = Index([2, 3, 4, 4, 5], name="id")
379+
idxdt = pd.to_datetime(
380+
[
381+
"201603231400",
382+
"201603231500",
383+
"201603231600",
384+
"201603231600",
385+
"201603231700",
386+
]
387+
)
388+
df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)
389+
df["tstamp"] = idxdt
390+
df = df.set_index("tstamp", append=True)
391+
ts = Timestamp("201603231600")
392+
assert df.index.is_unique is False
393+
394+
result = df.drop(ts, level="tstamp")
395+
expected = df.loc[idx != 4]
396+
tm.assert_frame_equal(result, expected)
397+
398+
@pytest.mark.parametrize("box", [Series, DataFrame])
399+
def test_drop_tz_aware_timestamp_across_dst(self, box):
400+
# GH#21761
401+
start = Timestamp("2017-10-29", tz="Europe/Berlin")
402+
end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin")
403+
index = pd.date_range(start, end, freq="15min")
404+
data = box(data=[1] * len(index), index=index)
405+
result = data.drop(start)
406+
expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin")
407+
expected_idx = pd.date_range(expected_start, end, freq="15min")
408+
expected = box(data=[1] * len(expected_idx), index=expected_idx)
409+
tm.assert_equal(result, expected)
410+
411+
def test_drop_preserve_names(self):
412+
index = MultiIndex.from_arrays(
413+
[[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"]
414+
)
415+
416+
df = DataFrame(np.random.randn(6, 3), index=index)
417+
418+
result = df.drop([(0, 2)])
419+
assert result.index.names == ("one", "two")

pandas/tests/indexes/multi/test_indexing.py

+28
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,34 @@ def test_contains_with_missing_value(self):
500500
assert np.nan not in idx
501501
assert (1, np.nan) in idx
502502

503+
def test_multiindex_contains_dropped(self):
504+
# GH#19027
505+
# test that dropped MultiIndex levels are not in the MultiIndex
506+
# despite continuing to be in the MultiIndex's levels
507+
idx = MultiIndex.from_product([[1, 2], [3, 4]])
508+
assert 2 in idx
509+
idx = idx.drop(2)
510+
511+
# drop implementation keeps 2 in the levels
512+
assert 2 in idx.levels[0]
513+
# but it should no longer be in the index itself
514+
assert 2 not in idx
515+
516+
# also applies to strings
517+
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
518+
assert "a" in idx
519+
idx = idx.drop("a")
520+
assert "a" in idx.levels[0]
521+
assert "a" not in idx
522+
523+
def test_contains_td64_level(self):
524+
# GH#24570
525+
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
526+
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
527+
assert tx[0] in idx
528+
assert "element_not_exit" not in idx
529+
assert "0 day 09:30:00" in idx
530+
503531

504532
def test_timestamp_multiindex_indexer():
505533
# https://github.com/pandas-dev/pandas/issues/26944

pandas/tests/indexing/multiindex/test_multiindex.py

-28
Original file line numberDiff line numberDiff line change
@@ -26,26 +26,6 @@ def test_multiindex_perf_warn(self):
2626
with tm.assert_produces_warning(PerformanceWarning):
2727
df.loc[(0,)]
2828

29-
def test_multiindex_contains_dropped(self):
30-
# GH 19027
31-
# test that dropped MultiIndex levels are not in the MultiIndex
32-
# despite continuing to be in the MultiIndex's levels
33-
idx = MultiIndex.from_product([[1, 2], [3, 4]])
34-
assert 2 in idx
35-
idx = idx.drop(2)
36-
37-
# drop implementation keeps 2 in the levels
38-
assert 2 in idx.levels[0]
39-
# but it should no longer be in the index itself
40-
assert 2 not in idx
41-
42-
# also applies to strings
43-
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
44-
assert "a" in idx
45-
idx = idx.drop("a")
46-
assert "a" in idx.levels[0]
47-
assert "a" not in idx
48-
4929
def test_indexing_over_hashtable_size_cutoff(self):
5030
n = 10000
5131

@@ -85,14 +65,6 @@ def test_multi_nan_indexing(self):
8565
)
8666
tm.assert_frame_equal(result, expected)
8767

88-
def test_contains(self):
89-
# GH 24570
90-
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
91-
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
92-
assert tx[0] in idx
93-
assert "element_not_exit" not in idx
94-
assert "0 day 09:30:00" in idx
95-
9668
def test_nested_tuples_duplicates(self):
9769
# GH#30892
9870

0 commit comments

Comments
 (0)