Skip to content

Commit 8922655

Browse files
committed
BUG: best effort to hack around byte stream decoding issue described in #795
1 parent 1b777aa commit 8922655

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

pandas/core/common.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def adjoin(space, *lists):
383383
Glues together two sets of strings using the amount of space requested.
384384
The idea is to prettify.
385385
"""
386-
outLines = []
386+
out_lines = []
387387
newLists = []
388388
lengths = [max(map(len, x)) + space for x in lists[:-1]]
389389

@@ -397,8 +397,16 @@ def adjoin(space, *lists):
397397
newLists.append(nl)
398398
toJoin = zip(*newLists)
399399
for lines in toJoin:
400-
outLines.append(''.join(lines))
401-
return '\n'.join(outLines)
400+
out_lines.append(_join_unicode(lines))
401+
return _join_unicode(out_lines, sep='\n')
402+
403+
def _join_unicode(lines, sep=''):
404+
try:
405+
return sep.join(lines)
406+
except UnicodeDecodeError:
407+
sep = unicode(sep)
408+
return sep.join([x.decode('utf-8') if isinstance(x, str) else x
409+
for x in lines])
402410

403411
def iterpairs(seq):
404412
"""

pandas/tests/test_format.py

+11
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,17 @@ def test_to_string_float_formatting(self):
233233
'1 2.512000e-01')
234234
assert(df_s == expected)
235235

236+
def test_to_string_ascii_error(self):
237+
data = [('0 ',
238+
u' .gitignore ',
239+
u' 5 ',
240+
' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80'
241+
'\xa2\xe2\x80\xa2\xe2\x80\xa2')]
242+
df = DataFrame(data)
243+
244+
# it works!
245+
repr(df)
246+
236247
def test_to_string_int_formatting(self):
237248
df = DataFrame({'x' : [-15, 20, 25, -35]})
238249
self.assert_(issubclass(df['x'].dtype.type, np.integer))

0 commit comments

Comments
 (0)