Skip to content

REF: collect .drop tests #33156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 160 additions & 1 deletion pandas/tests/frame/methods/test_drop.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pandas.errors import PerformanceWarning

import pandas as pd
from pandas import DataFrame, Index, MultiIndex
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
import pandas._testing as tm


Expand Down Expand Up @@ -258,3 +258,162 @@ def test_drop_non_empty_list(self, index, drop_labels):
# GH# 21494
with pytest.raises(KeyError, match="not found in axis"):
pd.DataFrame(index=index).drop(drop_labels)

def test_mixed_depth_drop(self):
arrays = [
["a", "top", "top", "routine1", "routine1", "routine2"],
["", "OD", "OD", "result1", "result2", "result1"],
["", "wx", "wy", "", "", ""],
]

tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.randn(4, 6), columns=index)

result = df.drop("a", axis=1)
expected = df.drop([("a", "", "")], axis=1)
tm.assert_frame_equal(expected, result)

result = df.drop(["top"], axis=1)
expected = df.drop([("top", "OD", "wx")], axis=1)
expected = expected.drop([("top", "OD", "wy")], axis=1)
tm.assert_frame_equal(expected, result)

result = df.drop(("top", "OD", "wx"), axis=1)
expected = df.drop([("top", "OD", "wx")], axis=1)
tm.assert_frame_equal(expected, result)

expected = df.drop([("top", "OD", "wy")], axis=1)
expected = df.drop("top", axis=1)

result = df.drop("result1", level=1, axis=1)
expected = df.drop(
[("routine1", "result1", ""), ("routine2", "result1", "")], axis=1
)
tm.assert_frame_equal(expected, result)

def test_drop_multiindex_other_level_nan(self):
# GH#12754
df = (
DataFrame(
{
"A": ["one", "one", "two", "two"],
"B": [np.nan, 0.0, 1.0, 2.0],
"C": ["a", "b", "c", "c"],
"D": [1, 2, 3, 4],
}
)
.set_index(["A", "B", "C"])
.sort_index()
)
result = df.drop("c", level="C")
expected = DataFrame(
[2, 1],
columns=["D"],
index=pd.MultiIndex.from_tuples(
[("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"]
),
)
tm.assert_frame_equal(result, expected)

def test_drop_nonunique(self):
df = DataFrame(
[
["x-a", "x", "a", 1.5],
["x-a", "x", "a", 1.2],
["z-c", "z", "c", 3.1],
["x-a", "x", "a", 4.1],
["x-b", "x", "b", 5.1],
["x-b", "x", "b", 4.1],
["x-b", "x", "b", 2.2],
["y-a", "y", "a", 1.2],
["z-b", "z", "b", 2.1],
],
columns=["var1", "var2", "var3", "var4"],
)

grp_size = df.groupby("var1").size()
drop_idx = grp_size.loc[grp_size == 1]

idf = df.set_index(["var1", "var2", "var3"])

# it works! GH#2101
result = idf.drop(drop_idx.index, level=0).reset_index()
expected = df[-df.var1.isin(drop_idx.index)]

result.index = expected.index

tm.assert_frame_equal(result, expected)

def test_drop_level(self):
index = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
frame = DataFrame(
np.random.randn(10, 3),
index=index,
columns=Index(["A", "B", "C"], name="exp"),
)

result = frame.drop(["bar", "qux"], level="first")
expected = frame.iloc[[0, 1, 2, 5, 6]]
tm.assert_frame_equal(result, expected)

result = frame.drop(["two"], level="second")
expected = frame.iloc[[0, 2, 3, 6, 7, 9]]
tm.assert_frame_equal(result, expected)

result = frame.T.drop(["bar", "qux"], axis=1, level="first")
expected = frame.iloc[[0, 1, 2, 5, 6]].T
tm.assert_frame_equal(result, expected)

result = frame.T.drop(["two"], axis=1, level="second")
expected = frame.iloc[[0, 2, 3, 6, 7, 9]].T
tm.assert_frame_equal(result, expected)

def test_drop_level_nonunique_datetime(self):
# GH#12701
idx = Index([2, 3, 4, 4, 5], name="id")
idxdt = pd.to_datetime(
[
"201603231400",
"201603231500",
"201603231600",
"201603231600",
"201603231700",
]
)
df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)
df["tstamp"] = idxdt
df = df.set_index("tstamp", append=True)
ts = Timestamp("201603231600")
assert df.index.is_unique is False

result = df.drop(ts, level="tstamp")
expected = df.loc[idx != 4]
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("box", [Series, DataFrame])
def test_drop_tz_aware_timestamp_across_dst(self, box):
# GH#21761
start = Timestamp("2017-10-29", tz="Europe/Berlin")
end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin")
index = pd.date_range(start, end, freq="15min")
data = box(data=[1] * len(index), index=index)
result = data.drop(start)
expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin")
expected_idx = pd.date_range(expected_start, end, freq="15min")
expected = box(data=[1] * len(expected_idx), index=expected_idx)
tm.assert_equal(result, expected)

def test_drop_preserve_names(self):
index = MultiIndex.from_arrays(
[[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"]
)

df = DataFrame(np.random.randn(6, 3), index=index)

result = df.drop([(0, 2)])
assert result.index.names == ("one", "two")
28 changes: 28 additions & 0 deletions pandas/tests/indexes/multi/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,34 @@ def test_contains_with_missing_value(self):
assert np.nan not in idx
assert (1, np.nan) in idx

def test_multiindex_contains_dropped(self):
# GH#19027
# test that dropped MultiIndex levels are not in the MultiIndex
# despite continuing to be in the MultiIndex's levels
idx = MultiIndex.from_product([[1, 2], [3, 4]])
assert 2 in idx
idx = idx.drop(2)

# drop implementation keeps 2 in the levels
assert 2 in idx.levels[0]
# but it should no longer be in the index itself
assert 2 not in idx

# also applies to strings
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
assert "a" in idx
idx = idx.drop("a")
assert "a" in idx.levels[0]
assert "a" not in idx

def test_contains_td64_level(self):
# GH#24570
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
assert tx[0] in idx
assert "element_not_exit" not in idx
assert "0 day 09:30:00" in idx


def test_timestamp_multiindex_indexer():
# https://github.com/pandas-dev/pandas/issues/26944
Expand Down
28 changes: 0 additions & 28 deletions pandas/tests/indexing/multiindex/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,6 @@ def test_multiindex_perf_warn(self):
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(0,)]

def test_multiindex_contains_dropped(self):
# GH 19027
# test that dropped MultiIndex levels are not in the MultiIndex
# despite continuing to be in the MultiIndex's levels
idx = MultiIndex.from_product([[1, 2], [3, 4]])
assert 2 in idx
idx = idx.drop(2)

# drop implementation keeps 2 in the levels
assert 2 in idx.levels[0]
# but it should no longer be in the index itself
assert 2 not in idx

# also applies to strings
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
assert "a" in idx
idx = idx.drop("a")
assert "a" in idx.levels[0]
assert "a" not in idx

def test_indexing_over_hashtable_size_cutoff(self):
n = 10000

Expand Down Expand Up @@ -85,14 +65,6 @@ def test_multi_nan_indexing(self):
)
tm.assert_frame_equal(result, expected)

def test_contains(self):
# GH 24570
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
assert tx[0] in idx
assert "element_not_exit" not in idx
assert "0 day 09:30:00" in idx

def test_nested_tuples_duplicates(self):
# GH#30892

Expand Down
Loading