Skip to content

REF: DataFrame delitem, take, pop, filter tests #33109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions pandas/tests/frame/indexing/test_delitem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import re

import numpy as np
import pytest

from pandas import DataFrame, MultiIndex


class TestDataFrameDelItem:
def test_delitem(self, float_frame):
del float_frame["A"]
assert "A" not in float_frame

def test_delitem_multiindex(self):
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
df = DataFrame(np.random.randn(4, 4), columns=midx)
assert len(df.columns) == 4
assert ("A",) in df.columns
assert "A" in df.columns

result = df["A"]
assert isinstance(result, DataFrame)
del df["A"]

assert len(df.columns) == 2

# A still in the levels, BUT get a KeyError if trying
# to delete
assert ("A",) not in df.columns
with pytest.raises(KeyError, match=re.escape("('A',)")):
del df[("A",)]

# behavior of dropped/deleted MultiIndex levels changed from
# GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
# levels which are dropped/deleted
assert "A" not in df.columns
with pytest.raises(KeyError, match=re.escape("('A',)")):
del df["A"]

def test_delitem_corner(self, float_frame):
f = float_frame.copy()
del f["D"]
assert len(f.columns) == 3
with pytest.raises(KeyError, match=r"^'D'$"):
del f["D"]
del f["B"]
assert len(f.columns) == 2

def test_delitem_col_still_multiindex(self):
arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]]

tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)

df = DataFrame(np.random.randn(3, 4), columns=index)
del df[("a", "", "")]
assert isinstance(df.columns, MultiIndex)
9 changes: 0 additions & 9 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,15 +854,6 @@ def test_getitem_empty_frame_with_boolean(self):
df2 = df[df > 0]
tm.assert_frame_equal(df, df2)

def test_delitem_corner(self, float_frame):
f = float_frame.copy()
del f["D"]
assert len(f.columns) == 3
with pytest.raises(KeyError, match=r"^'D'$"):
del f["D"]
del f["B"]
assert len(f.columns) == 2

def test_slice_floats(self):
index = [52195.504153, 52196.303147, 52198.369883]
df = DataFrame(np.random.rand(3, 2), index=index)
Expand Down
88 changes: 88 additions & 0 deletions pandas/tests/frame/indexing/test_take.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import pytest

import pandas._testing as tm


class TestDataFrameTake:
def test_take(self, float_frame):
# homogeneous
order = [3, 1, 2, 0]
for df in [float_frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["D", "B", "C", "A"]]
tm.assert_frame_equal(result, expected, check_names=False)

# negative indices
order = [2, 1, -1]
for df in [float_frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)

result = df.take(order, axis=0)
tm.assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["C", "B", "D"]]
tm.assert_frame_equal(result, expected, check_names=False)

# illegal indices
msg = "indices are out-of-bounds"
with pytest.raises(IndexError, match=msg):
df.take([3, 1, 2, 30], axis=0)
with pytest.raises(IndexError, match=msg):
df.take([3, 1, 2, -31], axis=0)
with pytest.raises(IndexError, match=msg):
df.take([3, 1, 2, 5], axis=1)
with pytest.raises(IndexError, match=msg):
df.take([3, 1, 2, -5], axis=1)

def test_take_mixed_type(self, float_string_frame):

# mixed-dtype
order = [4, 1, 2, 0, 3]
for df in [float_string_frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["foo", "B", "C", "A", "D"]]
tm.assert_frame_equal(result, expected)

# negative indices
order = [4, 1, -2]
for df in [float_string_frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["foo", "B", "D"]]
tm.assert_frame_equal(result, expected)

def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame):
# by dtype
order = [1, 2, 0, 3]
for df in [mixed_float_frame, mixed_int_frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["B", "C", "A", "D"]]
tm.assert_frame_equal(result, expected)
139 changes: 139 additions & 0 deletions pandas/tests/frame/methods/test_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame
import pandas._testing as tm


class TestDataFrameFilter:
def test_filter(self, float_frame, float_string_frame):
# Items
filtered = float_frame.filter(["A", "B", "E"])
assert len(filtered.columns) == 2
assert "E" not in filtered

filtered = float_frame.filter(["A", "B", "E"], axis="columns")
assert len(filtered.columns) == 2
assert "E" not in filtered

# Other axis
idx = float_frame.index[0:4]
filtered = float_frame.filter(idx, axis="index")
expected = float_frame.reindex(index=idx)
tm.assert_frame_equal(filtered, expected)

# like
fcopy = float_frame.copy()
fcopy["AA"] = 1

filtered = fcopy.filter(like="A")
assert len(filtered.columns) == 2
assert "AA" in filtered

# like with ints in column names
df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
filtered = df.filter(like="_")
assert len(filtered.columns) == 2

# regex with ints in column names
# from PR #10384
df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
expected = DataFrame(
0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)
)
filtered = df.filter(regex="^[0-9]+$")
tm.assert_frame_equal(filtered, expected)

expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
# shouldn't remove anything
filtered = expected.filter(regex="^[0-9]+$")
tm.assert_frame_equal(filtered, expected)

# pass in None
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter()
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter(items=None)
with pytest.raises(TypeError, match="Must pass"):
float_frame.filter(axis=1)

# test mutually exclusive arguments
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$", like="bbi")
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$", axis=1)
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], regex="e$")
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], like="bbi", axis=0)
with pytest.raises(TypeError, match="mutually exclusive"):
float_frame.filter(items=["one", "three"], like="bbi")

# objects
filtered = float_string_frame.filter(like="foo")
assert "foo" in filtered

# unicode columns, won't ascii-encode
df = float_frame.rename(columns={"B": "\u2202"})
filtered = df.filter(like="C")
assert "C" in filtered

def test_filter_regex_search(self, float_frame):
fcopy = float_frame.copy()
fcopy["AA"] = 1

# regex
filtered = fcopy.filter(regex="[A]+")
assert len(filtered.columns) == 2
assert "AA" in filtered

# doesn't have to be at beginning
df = DataFrame(
{"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]}
)

result = df.filter(regex="BB")
exp = df[[x for x in df.columns if "BB" in x]]
tm.assert_frame_equal(result, exp)

@pytest.mark.parametrize(
"name,expected",
[
("a", DataFrame({"a": [1, 2]})),
("a", DataFrame({"a": [1, 2]})),
("あ", DataFrame({"あ": [3, 4]})),
],
)
def test_filter_unicode(self, name, expected):
# GH13101
df = DataFrame({"a": [1, 2], "あ": [3, 4]})

tm.assert_frame_equal(df.filter(like=name), expected)
tm.assert_frame_equal(df.filter(regex=name), expected)

@pytest.mark.parametrize("name", ["a", "a"])
def test_filter_bytestring(self, name):
# GH13101
df = DataFrame({b"a": [1, 2], b"b": [3, 4]})
expected = DataFrame({b"a": [1, 2]})

tm.assert_frame_equal(df.filter(like=name), expected)
tm.assert_frame_equal(df.filter(regex=name), expected)

def test_filter_corner(self):
empty = DataFrame()

result = empty.filter([])
tm.assert_frame_equal(result, empty)

result = empty.filter(like="foo")
tm.assert_frame_equal(result, empty)

def test_filter_regex_non_string(self):
# GH#5798 trying to filter on non-string columns should drop,
# not raise
df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123])
result = df.filter(regex="STRING")
expected = df[["STRING"]]
tm.assert_frame_equal(result, expected)
40 changes: 40 additions & 0 deletions pandas/tests/frame/methods/test_pop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from pandas import DataFrame, Series
import pandas._testing as tm


class TestDataFramePop:
def test_pop(self, float_frame):
float_frame.columns.name = "baz"

float_frame.pop("A")
assert "A" not in float_frame

float_frame["foo"] = "bar"
float_frame.pop("foo")
assert "foo" not in float_frame
assert float_frame.columns.name == "baz"

# gh-10912: inplace ops cause caching issue
a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"], index=["X", "Y"])
b = a.pop("B")
b += 1

# original frame
expected = DataFrame([[1, 3], [4, 6]], columns=["A", "C"], index=["X", "Y"])
tm.assert_frame_equal(a, expected)

# result
expected = Series([2, 5], index=["X", "Y"], name="B") + 1
tm.assert_series_equal(b, expected)

def test_pop_non_unique_cols(self):
df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]})
df.columns = ["a", "b", "a"]

res = df.pop("a")
assert type(res) == DataFrame
assert len(res) == 2
assert len(df.columns) == 1
assert "b" in df.columns
assert "a" not in df.columns
assert len(df.index) == 2
Loading