diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 93c76bc80684f..875ba907823c6 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -537,6 +537,7 @@ Backwards incompatible API changes - the order of keyword arguments to text file parsing functions (``.read_csv()``, ``.read_table()``, ``.read_fwf()``) changed to group related arguments. (:issue:`11555`) - ``NaTType.isoformat`` now returns the string ``'NaT`` to allow the result to be passed to the constructor of ``Timestamp``. (:issue:`12300`) +- ``DataFrame.filter()`` enforces mutual exclusion of keyword arguments. (:issue:`12399`) NaT and Timedelta operations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9ecaaebc2b523..95529512edd40 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2357,7 +2357,10 @@ def _reindex_axis(self, new_index, fill_method, axis, copy): def filter(self, items=None, like=None, regex=None, axis=None): """ - Restrict the info axis to set of items or wildcard + Subset rows or columns of dataframe according to labels in the index. + + Note that this routine does not filter a dataframe on its + contents. The filter is applied to the labels of the index. Parameters ---------- @@ -2367,19 +2370,57 @@ def filter(self, items=None, like=None, regex=None, axis=None): Keep info axis where "arg in col == True" regex : string (regular expression) Keep info axis with re.search(regex, col) == True - axis : int or None - The axis to filter on. By default this is the info axis. The "info - axis" is the axis that is used when indexing with ``[]``. For - example, ``df = DataFrame({'a': [1, 2, 3, 4]]}); df['a']``. So, - the ``DataFrame`` columns are the info axis. + axis : int or string axis name + The axis to filter on. By default this is the info axis. The "info + axis" is the axis that is used when indexing with ``[]`` + + Returns + ------- + same type as input object with filtered info axis + + Examples + -------- + >>> df + one two three + mouse 1 2 3 + rabbit 4 5 6 + + >>> # select columns by name + >>> df.filter(items=['one', 'three']) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select columns by regular expression + >>> df.filter(regex='e$', axis=1) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select rows containing 'bbi' + >>> df.filter(like='bbi', axis=0) + one two three + rabbit 4 5 6 + + See Also + -------- + pandas.DataFrame.select Notes ----- - Arguments are mutually exclusive, but this is not checked for + The ``items``, ``like``, and ``regex`` parameters are + enforced to be mutually exclusive. + ``axis`` defaults to the info axis that is used when indexing + with ``[]``. """ import re + nkw = sum([x is not None for x in [items, like, regex]]) + if nkw > 1: + raise TypeError('Keyword arguments `items`, `like`, or `regex` ' + 'are mutually exclusive') + if axis is None: axis = self._info_axis_name axis_name = self._get_axis_name(axis) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 07fe28f13b7d0..9da1b31d259c5 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -661,8 +661,24 @@ def test_filter(self): assert_frame_equal(filtered, expected) # pass in None + with assertRaisesRegexp(TypeError, 'Must pass'): + self.frame.filter() with assertRaisesRegexp(TypeError, 'Must pass'): self.frame.filter(items=None) + with assertRaisesRegexp(TypeError, 'Must pass'): + self.frame.filter(axis=1) + + # test mutually exclusive arguments + with assertRaisesRegexp(TypeError, 'mutually exclusive'): + self.frame.filter(items=['one', 'three'], regex='e$', like='bbi') + with assertRaisesRegexp(TypeError, 'mutually exclusive'): + self.frame.filter(items=['one', 'three'], regex='e$', axis=1) + with assertRaisesRegexp(TypeError, 'mutually exclusive'): + self.frame.filter(items=['one', 'three'], regex='e$') + with assertRaisesRegexp(TypeError, 'mutually exclusive'): + self.frame.filter(items=['one', 'three'], like='bbi', axis=0) + with assertRaisesRegexp(TypeError, 'mutually exclusive'): + self.frame.filter(items=['one', 'three'], like='bbi') # objects filtered = self.mixed_frame.filter(like='foo')