Skip to content

Commit 98d4cc7

Browse files
Licht-TTomAugspurger
authored andcommitted
BUG: Fix filter method so that accepts byte and unicode column names (#18238)
(cherry picked from commit ec065b2)
1 parent dbf1f10 commit 98d4cc7

File tree

4 files changed

+45
-6
lines changed

4 files changed

+45
-6
lines changed

doc/source/whatsnew/v0.21.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ Reshaping
116116

117117
- Error message in ``pd.merge_asof()`` for key datatype mismatch now includes datatype of left and right key (:issue:`18068`)
118118
- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`)
119-
-
119+
- Bug in ``DataFrame.filter(...)`` when :class:`unicode` is passed as a condition in Python 2 (:issue:`13101`)
120120
-
121121

122122
Numeric

pandas/compat/__init__.py

+18
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,16 @@ def u(s):
257257
def u_safe(s):
258258
return s
259259

260+
def to_str(s):
261+
"""
262+
Convert bytes and non-string into Python 3 str
263+
"""
264+
if isinstance(s, binary_type):
265+
s = bytes_to_str(s)
266+
elif not isinstance(s, string_types):
267+
s = str(s)
268+
return s
269+
260270
def strlen(data, encoding=None):
261271
# encoding is for compat with PY2
262272
return len(data)
@@ -302,6 +312,14 @@ def u_safe(s):
302312
except:
303313
return s
304314

315+
def to_str(s):
316+
"""
317+
Convert unicode and non-string into Python 2 str
318+
"""
319+
if not isinstance(s, string_types):
320+
s = str(s)
321+
return s
322+
305323
def strlen(data, encoding=None):
306324
try:
307325
data = data.decode(encoding)

pandas/core/generic.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from pandas.tseries.frequencies import to_offset
5050
from pandas import compat
5151
from pandas.compat.numpy import function as nv
52-
from pandas.compat import (map, zip, lzip, lrange, string_types,
52+
from pandas.compat import (map, zip, lzip, lrange, string_types, to_str,
5353
isidentifier, set_function_name, cPickle as pkl)
5454
from pandas.core.ops import _align_method_FRAME
5555
import pandas.core.nanops as nanops
@@ -3235,14 +3235,14 @@ def filter(self, items=None, like=None, regex=None, axis=None):
32353235
**{name: [r for r in items if r in labels]})
32363236
elif like:
32373237
def f(x):
3238-
if not isinstance(x, string_types):
3239-
x = str(x)
3240-
return like in x
3238+
return like in to_str(x)
32413239
values = labels.map(f)
32423240
return self.loc(axis=axis)[values]
32433241
elif regex:
3242+
def f(x):
3243+
return matcher.search(to_str(x)) is not None
32443244
matcher = re.compile(regex)
3245-
values = labels.map(lambda x: matcher.search(str(x)) is not None)
3245+
values = labels.map(f)
32463246
return self.loc(axis=axis)[values]
32473247
else:
32483248
raise TypeError('Must pass either `items`, `like`, or `regex`')

pandas/tests/frame/test_axis_select_reindex.py

+21
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,27 @@ def test_filter_regex_search(self):
884884
exp = df[[x for x in df.columns if 'BB' in x]]
885885
assert_frame_equal(result, exp)
886886

887+
@pytest.mark.parametrize('name,expected', [
888+
('a', DataFrame({u'a': [1, 2]})),
889+
(u'a', DataFrame({u'a': [1, 2]})),
890+
(u'あ', DataFrame({u'あ': [3, 4]}))
891+
])
892+
def test_filter_unicode(self, name, expected):
893+
# GH13101
894+
df = DataFrame({u'a': [1, 2], u'あ': [3, 4]})
895+
896+
assert_frame_equal(df.filter(like=name), expected)
897+
assert_frame_equal(df.filter(regex=name), expected)
898+
899+
@pytest.mark.parametrize('name', ['a', u'a'])
900+
def test_filter_bytestring(self, name):
901+
# GH13101
902+
df = DataFrame({b'a': [1, 2], b'b': [3, 4]})
903+
expected = DataFrame({b'a': [1, 2]})
904+
905+
assert_frame_equal(df.filter(like=name), expected)
906+
assert_frame_equal(df.filter(regex=name), expected)
907+
887908
def test_filter_corner(self):
888909
empty = DataFrame()
889910

0 commit comments

Comments
 (0)