Skip to content

Commit 617e231

Browse files
committed
Deprecate NDFrame.filter
1 parent 844dc4a commit 617e231

File tree

3 files changed

+16
-131
lines changed

3 files changed

+16
-131
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,7 @@ Deprecations
581581
it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`).
582582
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
583583
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
584+
- :meth:`DataFrame.filter` and :meth:`Series.filter` are deprecated. (:issue:`26642`)
584585

585586
.. _whatsnew_1000.prior_deprecations:
586587

pandas/core/generic.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -4557,7 +4557,11 @@ def filter(
45574557
"""
45584558
Subset the dataframe rows or columns according to the specified index labels.
45594559
4560-
Note that this routine does not filter a dataframe on its
4560+
.. deprecated:: 1.0
4561+
Use .loc instead, e.g. for regular expressions
4562+
use .loc(regex=True)[:, "^col_"]
4563+
4564+
Note that this method does not filter a dataframe on its
45614565
contents. The filter is applied to the labels of the index.
45624566
45634567
Parameters
@@ -4612,6 +4616,13 @@ def filter(
46124616
one two three
46134617
rabbit 4 5 6
46144618
"""
4619+
warnings.warn(
4620+
"DataFrame/Series.filter is deprecated "
4621+
"and will be removed in a future version",
4622+
FutureWarning,
4623+
stacklevel=2,
4624+
)
4625+
46154626
nkw = com.count_not_none(items, like, regex)
46164627
if nkw > 1:
46174628
raise TypeError(

pandas/tests/frame/test_axis_select_reindex.py

+3-130
Original file line numberDiff line numberDiff line change
@@ -806,136 +806,9 @@ def test_align_series_combinations(self):
806806
tm.assert_series_equal(res1, exp2)
807807
tm.assert_frame_equal(res2, exp1)
808808

809-
def test_filter(self, float_frame, float_string_frame):
810-
# Items
811-
filtered = float_frame.filter(["A", "B", "E"])
812-
assert len(filtered.columns) == 2
813-
assert "E" not in filtered
814-
815-
filtered = float_frame.filter(["A", "B", "E"], axis="columns")
816-
assert len(filtered.columns) == 2
817-
assert "E" not in filtered
818-
819-
# Other axis
820-
idx = float_frame.index[0:4]
821-
filtered = float_frame.filter(idx, axis="index")
822-
expected = float_frame.reindex(index=idx)
823-
tm.assert_frame_equal(filtered, expected)
824-
825-
# like
826-
fcopy = float_frame.copy()
827-
fcopy["AA"] = 1
828-
829-
filtered = fcopy.filter(like="A")
830-
assert len(filtered.columns) == 2
831-
assert "AA" in filtered
832-
833-
# like with ints in column names
834-
df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
835-
filtered = df.filter(like="_")
836-
assert len(filtered.columns) == 2
837-
838-
# regex with ints in column names
839-
# from PR #10384
840-
df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
841-
expected = DataFrame(
842-
0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)
843-
)
844-
filtered = df.filter(regex="^[0-9]+$")
845-
tm.assert_frame_equal(filtered, expected)
846-
847-
expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
848-
# shouldn't remove anything
849-
filtered = expected.filter(regex="^[0-9]+$")
850-
tm.assert_frame_equal(filtered, expected)
851-
852-
# pass in None
853-
with pytest.raises(TypeError, match="Must pass"):
854-
float_frame.filter()
855-
with pytest.raises(TypeError, match="Must pass"):
856-
float_frame.filter(items=None)
857-
with pytest.raises(TypeError, match="Must pass"):
858-
float_frame.filter(axis=1)
859-
860-
# test mutually exclusive arguments
861-
with pytest.raises(TypeError, match="mutually exclusive"):
862-
float_frame.filter(items=["one", "three"], regex="e$", like="bbi")
863-
with pytest.raises(TypeError, match="mutually exclusive"):
864-
float_frame.filter(items=["one", "three"], regex="e$", axis=1)
865-
with pytest.raises(TypeError, match="mutually exclusive"):
866-
float_frame.filter(items=["one", "three"], regex="e$")
867-
with pytest.raises(TypeError, match="mutually exclusive"):
868-
float_frame.filter(items=["one", "three"], like="bbi", axis=0)
869-
with pytest.raises(TypeError, match="mutually exclusive"):
870-
float_frame.filter(items=["one", "three"], like="bbi")
871-
872-
# objects
873-
filtered = float_string_frame.filter(like="foo")
874-
assert "foo" in filtered
875-
876-
# unicode columns, won't ascii-encode
877-
df = float_frame.rename(columns={"B": "\u2202"})
878-
filtered = df.filter(like="C")
879-
assert "C" in filtered
880-
881-
def test_filter_regex_search(self, float_frame):
882-
fcopy = float_frame.copy()
883-
fcopy["AA"] = 1
884-
885-
# regex
886-
filtered = fcopy.filter(regex="[A]+")
887-
assert len(filtered.columns) == 2
888-
assert "AA" in filtered
889-
890-
# doesn't have to be at beginning
891-
df = DataFrame(
892-
{"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]}
893-
)
894-
895-
result = df.filter(regex="BB")
896-
exp = df[[x for x in df.columns if "BB" in x]]
897-
tm.assert_frame_equal(result, exp)
898-
899-
@pytest.mark.parametrize(
900-
"name,expected",
901-
[
902-
("a", DataFrame({"a": [1, 2]})),
903-
("a", DataFrame({"a": [1, 2]})),
904-
("あ", DataFrame({"あ": [3, 4]})),
905-
],
906-
)
907-
def test_filter_unicode(self, name, expected):
908-
# GH13101
909-
df = DataFrame({"a": [1, 2], "あ": [3, 4]})
910-
911-
tm.assert_frame_equal(df.filter(like=name), expected)
912-
tm.assert_frame_equal(df.filter(regex=name), expected)
913-
914-
@pytest.mark.parametrize("name", ["a", "a"])
915-
def test_filter_bytestring(self, name):
916-
# GH13101
917-
df = DataFrame({b"a": [1, 2], b"b": [3, 4]})
918-
expected = DataFrame({b"a": [1, 2]})
919-
920-
tm.assert_frame_equal(df.filter(like=name), expected)
921-
tm.assert_frame_equal(df.filter(regex=name), expected)
922-
923-
def test_filter_corner(self):
924-
empty = DataFrame()
925-
926-
result = empty.filter([])
927-
tm.assert_frame_equal(result, empty)
928-
929-
result = empty.filter(like="foo")
930-
tm.assert_frame_equal(result, empty)
931-
932-
def test_filter_regex_non_string(self):
933-
# GH#5798 trying to filter on non-string columns should drop,
934-
# not raise
935-
df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123])
936-
result = df.filter(regex="STRING")
937-
expected = df[["STRING"]]
938-
tm.assert_frame_equal(result, expected)
809+
def test_filter_deprecated(self, float_frame):
810+
with tm.assert_produces_warning(FutureWarning):
811+
float_frame.filter(["A", "B", "E"])
939812

940813
def test_take(self, float_frame):
941814
# homogeneous

0 commit comments

Comments
 (0)