BUG: best effort to hack around byte stream decoding issue described in #795

wesm · wesm · commit 892265568ac8 · 2012-02-17T19:34:39.000-05:00
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -383,7 +383,7 @@ def adjoin(space, *lists):
     Glues together two sets of strings using the amount of space requested.
     The idea is to prettify.
     """
-    outLines = []
+    out_lines = []
     newLists = []
     lengths = [max(map(len, x)) + space for x in lists[:-1]]
 
@@ -397,8 +397,16 @@ def adjoin(space, *lists):
         newLists.append(nl)
     toJoin = zip(*newLists)
     for lines in toJoin:
-        outLines.append(''.join(lines))
-    return '\n'.join(outLines)
+        out_lines.append(_join_unicode(lines))
+    return _join_unicode(out_lines, sep='\n')
+
+def _join_unicode(lines, sep=''):
+    try:
+        return sep.join(lines)
+    except UnicodeDecodeError:
+        sep = unicode(sep)
+        return sep.join([x.decode('utf-8') if isinstance(x, str) else x
+                         for x in lines])
 
 def iterpairs(seq):
     """
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -233,6 +233,17 @@ def test_to_string_float_formatting(self):
                         '1  2.512000e-01')
         assert(df_s == expected)
 
+    def test_to_string_ascii_error(self):
+        data = [('0  ',
+                 u'                        .gitignore ',
+                 u'     5 ',
+                 ' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80'
+                 '\xa2\xe2\x80\xa2\xe2\x80\xa2')]
+        df = DataFrame(data)
+
+        # it works!
+        repr(df)
+
     def test_to_string_int_formatting(self):
         df = DataFrame({'x' : [-15, 20, 25, -35]})
         self.assert_(issubclass(df['x'].dtype.type, np.integer))