diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 7c5dc66ce4587..86dcc9dcefa09 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -115,7 +115,7 @@ Reshaping - Error message in ``pd.merge_asof()`` for key datatype mismatch now includes datatype of left and right key (:issue:`18068`) - Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) -- +- Bug in ``DataFrame.filter(...)`` when :class:`unicode` is passed as a condition in Python 2 (:issue:`13101`) - Numeric diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 8a6a979ddd7c3..a615e098135a9 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -257,6 +257,16 @@ def u(s): def u_safe(s): return s + def to_str(s): + """ + Convert bytes and non-string into Python 3 str + """ + if isinstance(s, binary_type): + s = bytes_to_str(s) + elif not isinstance(s, string_types): + s = str(s) + return s + def strlen(data, encoding=None): # encoding is for compat with PY2 return len(data) @@ -302,6 +312,14 @@ def u_safe(s): except: return s + def to_str(s): + """ + Convert unicode and non-string into Python 2 str + """ + if not isinstance(s, string_types): + s = str(s) + return s + def strlen(data, encoding=None): try: data = data.decode(encoding) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d93fe52d5ca9c..0a10058677fb9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -50,7 +50,7 @@ from pandas.tseries.frequencies import to_offset from pandas import compat from pandas.compat.numpy import function as nv -from pandas.compat import (map, zip, lzip, lrange, string_types, +from pandas.compat import (map, zip, lzip, lrange, string_types, to_str, isidentifier, set_function_name, cPickle as pkl) from pandas.core.ops import _align_method_FRAME import pandas.core.nanops as nanops @@ -3218,14 +3218,14 @@ def filter(self, items=None, like=None, regex=None, axis=None): **{name: [r for r in items if r in labels]}) elif like: def f(x): - if not isinstance(x, string_types): - x = str(x) - return like in x + return like in to_str(x) values = labels.map(f) return self.loc(axis=axis)[values] elif regex: + def f(x): + return matcher.search(to_str(x)) is not None matcher = re.compile(regex) - values = labels.map(lambda x: matcher.search(str(x)) is not None) + values = labels.map(f) return self.loc(axis=axis)[values] else: raise TypeError('Must pass either `items`, `like`, or `regex`') diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 1e2f630401c89..343e235fb741c 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -884,6 +884,27 @@ def test_filter_regex_search(self): exp = df[[x for x in df.columns if 'BB' in x]] assert_frame_equal(result, exp) + @pytest.mark.parametrize('name,expected', [ + ('a', DataFrame({u'a': [1, 2]})), + (u'a', DataFrame({u'a': [1, 2]})), + (u'あ', DataFrame({u'あ': [3, 4]})) + ]) + def test_filter_unicode(self, name, expected): + # GH13101 + df = DataFrame({u'a': [1, 2], u'あ': [3, 4]}) + + assert_frame_equal(df.filter(like=name), expected) + assert_frame_equal(df.filter(regex=name), expected) + + @pytest.mark.parametrize('name', ['a', u'a']) + def test_filter_bytestring(self, name): + # GH13101 + df = DataFrame({b'a': [1, 2], b'b': [3, 4]}) + expected = DataFrame({b'a': [1, 2]}) + + assert_frame_equal(df.filter(like=name), expected) + assert_frame_equal(df.filter(regex=name), expected) + def test_filter_corner(self): empty = DataFrame()