Skip to content

Commit 50baa9a

Browse files
committed
CLN: parametrize test, codestyle update
1 parent 07975d6 commit 50baa9a

File tree

2 files changed

+31
-31
lines changed

2 files changed

+31
-31
lines changed

pandas/core/reshape/reshape.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# pylint: disable=E1101,E1103
22
# pylint: disable=W0703,W0622,W0613,W0201
3-
from pandas.compat import range, text_type, zip, u
3+
from pandas.compat import range, text_type, zip, u, PY2
44
from pandas import compat
55
from functools import partial
66
import itertools
@@ -923,19 +923,21 @@ def get_empty_Frame(data, sparse):
923923

924924
number_of_cols = len(levels)
925925

926-
py2_prefix_sep_is_unicode = isinstance(prefix_sep, text_type)
927-
if prefix is not None:
928-
py2_prefix_is_unicode = isinstance(prefix, text_type)
929-
dummy_cols = []
930-
for level in levels:
931-
fstr = '{prefix}{sep}{level}'
932-
if py2_prefix_sep_is_unicode or py2_prefix_is_unicode or \
933-
isinstance(level, text_type):
934-
fstr = u(fstr)
935-
dummy_cols.append(fstr.format(
936-
prefix=prefix, sep=prefix_sep, level=level))
937-
else:
926+
if prefix is None:
938927
dummy_cols = levels
928+
else:
929+
def _make_col_name(prefix, prefix_sep, level):
930+
fstr = '{prefix}{prefix_sep}{level}'
931+
if PY2 and (isinstance(prefix, text_type) or
932+
isinstance(prefix_sep, text_type) or
933+
isinstance(level, text_type)):
934+
fstr = u(fstr)
935+
return fstr.format(prefix=prefix,
936+
prefix_sep=prefix_sep,
937+
level=level)
938+
939+
dummy_cols = [_make_col_name(prefix, prefix_sep, level)
940+
for level in levels]
939941

940942
if isinstance(data, Series):
941943
index = data.index

pandas/tests/reshape/test_reshape.py

+16-18
Original file line numberDiff line numberDiff line change
@@ -302,24 +302,22 @@ def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
302302
expected.sort_index(axis=1)
303303
assert_frame_equal(result, expected)
304304

305-
def test_dataframe_dummies_unicode(self):
306-
df = pd.DataFrame(({u'ä': ['a']}))
307-
result = get_dummies(df)
308-
expected = pd.DataFrame({u'ä_a': [1]}, dtype=np.uint8)
309-
assert_frame_equal(result, expected)
310-
311-
df = pd.DataFrame({'x': [u'ä']})
312-
result = pd.get_dummies(df)
313-
expected = pd.DataFrame({u'x_ä': [1]}, dtype=np.uint8)
314-
assert_frame_equal(result, expected)
315-
316-
df = pd.DataFrame({'x': ['a']})
317-
result = pd.get_dummies(df, prefix=u'ä')
318-
expected = pd.DataFrame({u'ä_a': [1]}, dtype=np.uint8)
319-
assert_frame_equal(result, expected)
320-
321-
result = pd.get_dummies(df, prefix_sep=u'ä')
322-
expected = pd.DataFrame({u'xäa': [1]}, dtype=np.uint8)
305+
@pytest.mark.parametrize('get_dummies_kwargs,expected', [
306+
({'data': pd.DataFrame(({u'ä': ['a']}))},
307+
pd.DataFrame({u'ä_a': [1]}, dtype=np.uint8)),
308+
309+
({'data': pd.DataFrame({'x': [u'ä']})},
310+
pd.DataFrame({u'x_ä': [1]}, dtype=np.uint8)),
311+
312+
({'data': pd.DataFrame({'x': [u'a']}), 'prefix':u'ä'},
313+
pd.DataFrame({u'ä_a': [1]}, dtype=np.uint8)),
314+
315+
({'data': pd.DataFrame({'x': [u'a']}), 'prefix_sep':u'ä'},
316+
pd.DataFrame({u'xäa': [1]}, dtype=np.uint8))])
317+
def test_dataframe_dummies_unicode(self, get_dummies_kwargs, expected):
318+
# GH22084 pd.get_dummies incorrectly encodes unicode characters
319+
# in dataframe column names
320+
result = get_dummies(**get_dummies_kwargs)
323321
assert_frame_equal(result, expected)
324322

325323
def test_basic_drop_first(self, sparse):

0 commit comments

Comments
 (0)