Skip to content

Commit 7918d60

Browse files
maxgrenderjonesjreback
authored andcommitted
Fix for GH 6885 - get_dummies chokes on unicode values
1 parent 006db75 commit 7918d60

File tree

3 files changed

+13
-2
lines changed

3 files changed

+13
-2
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ Bug Fixes
451451
- Bug in ``DataFrame.plot`` draws unnecessary axes when enabling ``subplots`` and ``kind=scatter`` (:issue:`6951`)
452452
- Bug in ``read_csv`` from a filesystem with non-utf-8 encoding (:issue:`6807`)
453453
- Bug in ``iloc`` when setting / aligning (:issue:``6766`)
454+
- Bug causing UnicodeEncodeError when get_dummies called with unicode values and a prefix (:issue:`6885`)
454455

455456
pandas 0.13.1
456457
-------------

pandas/core/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False):
10191019
dummy_mat[cat.labels == -1] = 0
10201020

10211021
if prefix is not None:
1022-
dummy_cols = ['%s%s%s' % (prefix, prefix_sep, str(v))
1022+
dummy_cols = ['%s%s%s' % (prefix, prefix_sep, v)
10231023
for v in levels]
10241024
else:
10251025
dummy_cols = levels

pandas/tests/test_reshape.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pandas.core.reshape import (melt, convert_dummies, lreshape, get_dummies,
1919
wide_to_long)
2020
import pandas.util.testing as tm
21-
from pandas.compat import StringIO, cPickle, range
21+
from pandas.compat import StringIO, cPickle, range, u
2222

2323
_multiprocess_can_split_ = True
2424

@@ -199,6 +199,16 @@ def test_include_na(self):
199199
exp_just_na = DataFrame(Series(1.0,index=[0]),columns=[nan])
200200
assert_array_equal(res_just_na.values, exp_just_na.values)
201201

202+
def test_unicode(self): # See GH 6885 - get_dummies chokes on unicode values
203+
import unicodedata
204+
e = 'e'
205+
eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
206+
s = [e, eacute, eacute]
207+
res = get_dummies(s, prefix='letter')
208+
exp = DataFrame({'letter_e': {0: 1.0, 1: 0.0, 2: 0.0},
209+
u('letter_%s') % eacute: {0: 0.0, 1: 1.0, 2: 1.0}})
210+
assert_frame_equal(res, exp)
211+
202212
class TestConvertDummies(tm.TestCase):
203213
def test_convert_dummies(self):
204214
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',

0 commit comments

Comments
 (0)