Skip to content

Commit 36d6583

Browse files
authored
REF: DataFrame delitem, take, pop, filter tests (#33109)
1 parent ff0f65a commit 36d6583

File tree

9 files changed

+374
-340
lines changed

9 files changed

+374
-340
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import re
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DataFrame, MultiIndex
7+
8+
9+
class TestDataFrameDelItem:
10+
def test_delitem(self, float_frame):
11+
del float_frame["A"]
12+
assert "A" not in float_frame
13+
14+
def test_delitem_multiindex(self):
15+
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
16+
df = DataFrame(np.random.randn(4, 4), columns=midx)
17+
assert len(df.columns) == 4
18+
assert ("A",) in df.columns
19+
assert "A" in df.columns
20+
21+
result = df["A"]
22+
assert isinstance(result, DataFrame)
23+
del df["A"]
24+
25+
assert len(df.columns) == 2
26+
27+
# A still in the levels, BUT get a KeyError if trying
28+
# to delete
29+
assert ("A",) not in df.columns
30+
with pytest.raises(KeyError, match=re.escape("('A',)")):
31+
del df[("A",)]
32+
33+
# behavior of dropped/deleted MultiIndex levels changed from
34+
# GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
35+
# levels which are dropped/deleted
36+
assert "A" not in df.columns
37+
with pytest.raises(KeyError, match=re.escape("('A',)")):
38+
del df["A"]
39+
40+
def test_delitem_corner(self, float_frame):
41+
f = float_frame.copy()
42+
del f["D"]
43+
assert len(f.columns) == 3
44+
with pytest.raises(KeyError, match=r"^'D'$"):
45+
del f["D"]
46+
del f["B"]
47+
assert len(f.columns) == 2
48+
49+
def test_delitem_col_still_multiindex(self):
50+
arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]]
51+
52+
tuples = sorted(zip(*arrays))
53+
index = MultiIndex.from_tuples(tuples)
54+
55+
df = DataFrame(np.random.randn(3, 4), columns=index)
56+
del df[("a", "", "")]
57+
assert isinstance(df.columns, MultiIndex)

pandas/tests/frame/indexing/test_indexing.py

-9
Original file line numberDiff line numberDiff line change
@@ -854,15 +854,6 @@ def test_getitem_empty_frame_with_boolean(self):
854854
df2 = df[df > 0]
855855
tm.assert_frame_equal(df, df2)
856856

857-
def test_delitem_corner(self, float_frame):
858-
f = float_frame.copy()
859-
del f["D"]
860-
assert len(f.columns) == 3
861-
with pytest.raises(KeyError, match=r"^'D'$"):
862-
del f["D"]
863-
del f["B"]
864-
assert len(f.columns) == 2
865-
866857
def test_slice_floats(self):
867858
index = [52195.504153, 52196.303147, 52198.369883]
868859
df = DataFrame(np.random.rand(3, 2), index=index)
+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import pytest
2+
3+
import pandas._testing as tm
4+
5+
6+
class TestDataFrameTake:
7+
def test_take(self, float_frame):
8+
# homogeneous
9+
order = [3, 1, 2, 0]
10+
for df in [float_frame]:
11+
12+
result = df.take(order, axis=0)
13+
expected = df.reindex(df.index.take(order))
14+
tm.assert_frame_equal(result, expected)
15+
16+
# axis = 1
17+
result = df.take(order, axis=1)
18+
expected = df.loc[:, ["D", "B", "C", "A"]]
19+
tm.assert_frame_equal(result, expected, check_names=False)
20+
21+
# negative indices
22+
order = [2, 1, -1]
23+
for df in [float_frame]:
24+
25+
result = df.take(order, axis=0)
26+
expected = df.reindex(df.index.take(order))
27+
tm.assert_frame_equal(result, expected)
28+
29+
result = df.take(order, axis=0)
30+
tm.assert_frame_equal(result, expected)
31+
32+
# axis = 1
33+
result = df.take(order, axis=1)
34+
expected = df.loc[:, ["C", "B", "D"]]
35+
tm.assert_frame_equal(result, expected, check_names=False)
36+
37+
# illegal indices
38+
msg = "indices are out-of-bounds"
39+
with pytest.raises(IndexError, match=msg):
40+
df.take([3, 1, 2, 30], axis=0)
41+
with pytest.raises(IndexError, match=msg):
42+
df.take([3, 1, 2, -31], axis=0)
43+
with pytest.raises(IndexError, match=msg):
44+
df.take([3, 1, 2, 5], axis=1)
45+
with pytest.raises(IndexError, match=msg):
46+
df.take([3, 1, 2, -5], axis=1)
47+
48+
def test_take_mixed_type(self, float_string_frame):
49+
50+
# mixed-dtype
51+
order = [4, 1, 2, 0, 3]
52+
for df in [float_string_frame]:
53+
54+
result = df.take(order, axis=0)
55+
expected = df.reindex(df.index.take(order))
56+
tm.assert_frame_equal(result, expected)
57+
58+
# axis = 1
59+
result = df.take(order, axis=1)
60+
expected = df.loc[:, ["foo", "B", "C", "A", "D"]]
61+
tm.assert_frame_equal(result, expected)
62+
63+
# negative indices
64+
order = [4, 1, -2]
65+
for df in [float_string_frame]:
66+
67+
result = df.take(order, axis=0)
68+
expected = df.reindex(df.index.take(order))
69+
tm.assert_frame_equal(result, expected)
70+
71+
# axis = 1
72+
result = df.take(order, axis=1)
73+
expected = df.loc[:, ["foo", "B", "D"]]
74+
tm.assert_frame_equal(result, expected)
75+
76+
def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame):
77+
# by dtype
78+
order = [1, 2, 0, 3]
79+
for df in [mixed_float_frame, mixed_int_frame]:
80+
81+
result = df.take(order, axis=0)
82+
expected = df.reindex(df.index.take(order))
83+
tm.assert_frame_equal(result, expected)
84+
85+
# axis = 1
86+
result = df.take(order, axis=1)
87+
expected = df.loc[:, ["B", "C", "A", "D"]]
88+
tm.assert_frame_equal(result, expected)
+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import DataFrame
6+
import pandas._testing as tm
7+
8+
9+
class TestDataFrameFilter:
10+
def test_filter(self, float_frame, float_string_frame):
11+
# Items
12+
filtered = float_frame.filter(["A", "B", "E"])
13+
assert len(filtered.columns) == 2
14+
assert "E" not in filtered
15+
16+
filtered = float_frame.filter(["A", "B", "E"], axis="columns")
17+
assert len(filtered.columns) == 2
18+
assert "E" not in filtered
19+
20+
# Other axis
21+
idx = float_frame.index[0:4]
22+
filtered = float_frame.filter(idx, axis="index")
23+
expected = float_frame.reindex(index=idx)
24+
tm.assert_frame_equal(filtered, expected)
25+
26+
# like
27+
fcopy = float_frame.copy()
28+
fcopy["AA"] = 1
29+
30+
filtered = fcopy.filter(like="A")
31+
assert len(filtered.columns) == 2
32+
assert "AA" in filtered
33+
34+
# like with ints in column names
35+
df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
36+
filtered = df.filter(like="_")
37+
assert len(filtered.columns) == 2
38+
39+
# regex with ints in column names
40+
# from PR #10384
41+
df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
42+
expected = DataFrame(
43+
0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)
44+
)
45+
filtered = df.filter(regex="^[0-9]+$")
46+
tm.assert_frame_equal(filtered, expected)
47+
48+
expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
49+
# shouldn't remove anything
50+
filtered = expected.filter(regex="^[0-9]+$")
51+
tm.assert_frame_equal(filtered, expected)
52+
53+
# pass in None
54+
with pytest.raises(TypeError, match="Must pass"):
55+
float_frame.filter()
56+
with pytest.raises(TypeError, match="Must pass"):
57+
float_frame.filter(items=None)
58+
with pytest.raises(TypeError, match="Must pass"):
59+
float_frame.filter(axis=1)
60+
61+
# test mutually exclusive arguments
62+
with pytest.raises(TypeError, match="mutually exclusive"):
63+
float_frame.filter(items=["one", "three"], regex="e$", like="bbi")
64+
with pytest.raises(TypeError, match="mutually exclusive"):
65+
float_frame.filter(items=["one", "three"], regex="e$", axis=1)
66+
with pytest.raises(TypeError, match="mutually exclusive"):
67+
float_frame.filter(items=["one", "three"], regex="e$")
68+
with pytest.raises(TypeError, match="mutually exclusive"):
69+
float_frame.filter(items=["one", "three"], like="bbi", axis=0)
70+
with pytest.raises(TypeError, match="mutually exclusive"):
71+
float_frame.filter(items=["one", "three"], like="bbi")
72+
73+
# objects
74+
filtered = float_string_frame.filter(like="foo")
75+
assert "foo" in filtered
76+
77+
# unicode columns, won't ascii-encode
78+
df = float_frame.rename(columns={"B": "\u2202"})
79+
filtered = df.filter(like="C")
80+
assert "C" in filtered
81+
82+
def test_filter_regex_search(self, float_frame):
83+
fcopy = float_frame.copy()
84+
fcopy["AA"] = 1
85+
86+
# regex
87+
filtered = fcopy.filter(regex="[A]+")
88+
assert len(filtered.columns) == 2
89+
assert "AA" in filtered
90+
91+
# doesn't have to be at beginning
92+
df = DataFrame(
93+
{"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]}
94+
)
95+
96+
result = df.filter(regex="BB")
97+
exp = df[[x for x in df.columns if "BB" in x]]
98+
tm.assert_frame_equal(result, exp)
99+
100+
@pytest.mark.parametrize(
101+
"name,expected",
102+
[
103+
("a", DataFrame({"a": [1, 2]})),
104+
("a", DataFrame({"a": [1, 2]})),
105+
("あ", DataFrame({"あ": [3, 4]})),
106+
],
107+
)
108+
def test_filter_unicode(self, name, expected):
109+
# GH13101
110+
df = DataFrame({"a": [1, 2], "あ": [3, 4]})
111+
112+
tm.assert_frame_equal(df.filter(like=name), expected)
113+
tm.assert_frame_equal(df.filter(regex=name), expected)
114+
115+
@pytest.mark.parametrize("name", ["a", "a"])
116+
def test_filter_bytestring(self, name):
117+
# GH13101
118+
df = DataFrame({b"a": [1, 2], b"b": [3, 4]})
119+
expected = DataFrame({b"a": [1, 2]})
120+
121+
tm.assert_frame_equal(df.filter(like=name), expected)
122+
tm.assert_frame_equal(df.filter(regex=name), expected)
123+
124+
def test_filter_corner(self):
125+
empty = DataFrame()
126+
127+
result = empty.filter([])
128+
tm.assert_frame_equal(result, empty)
129+
130+
result = empty.filter(like="foo")
131+
tm.assert_frame_equal(result, empty)
132+
133+
def test_filter_regex_non_string(self):
134+
# GH#5798 trying to filter on non-string columns should drop,
135+
# not raise
136+
df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123])
137+
result = df.filter(regex="STRING")
138+
expected = df[["STRING"]]
139+
tm.assert_frame_equal(result, expected)
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from pandas import DataFrame, Series
2+
import pandas._testing as tm
3+
4+
5+
class TestDataFramePop:
6+
def test_pop(self, float_frame):
7+
float_frame.columns.name = "baz"
8+
9+
float_frame.pop("A")
10+
assert "A" not in float_frame
11+
12+
float_frame["foo"] = "bar"
13+
float_frame.pop("foo")
14+
assert "foo" not in float_frame
15+
assert float_frame.columns.name == "baz"
16+
17+
# gh-10912: inplace ops cause caching issue
18+
a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"], index=["X", "Y"])
19+
b = a.pop("B")
20+
b += 1
21+
22+
# original frame
23+
expected = DataFrame([[1, 3], [4, 6]], columns=["A", "C"], index=["X", "Y"])
24+
tm.assert_frame_equal(a, expected)
25+
26+
# result
27+
expected = Series([2, 5], index=["X", "Y"], name="B") + 1
28+
tm.assert_series_equal(b, expected)
29+
30+
def test_pop_non_unique_cols(self):
31+
df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]})
32+
df.columns = ["a", "b", "a"]
33+
34+
res = df.pop("a")
35+
assert type(res) == DataFrame
36+
assert len(res) == 2
37+
assert len(df.columns) == 1
38+
assert "b" in df.columns
39+
assert "a" not in df.columns
40+
assert len(df.index) == 2

0 commit comments

Comments
 (0)