ENH: better console unicode handling, GH #680, #340

wesm · wesm · commit b4ca18bd605e · 2012-01-24T22:08:27.000-05:00
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -509,7 +509,7 @@ def _stringify(col):
     if isinstance(col, tuple):
         return str(col)
     else:
-        return '%s' % col
+        return '%s' % console_encode(col)
 
 def _float_format_default(v, width=None):
     """
@@ -814,3 +814,12 @@ def load(path):
         f.close()
 
 
+def console_encode(value):
+    if not isinstance(value, unicode):
+        return value
+
+    import sys
+    if sys.stdin is not None and sys.stdin.encoding is not None:
+        return value.encode(sys.stdin.encoding)
+    else:
+        return value.encode('ascii', 'replace')
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -371,9 +371,9 @@ def is_numeric_dtype(dtype):
             dtypes = self.frame.dtypes
             need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
             str_columns = [[' %s' % x
-                            if x not in formatters and need_leadsp[x]
+                            if col not in formatters and need_leadsp[x]
                             else str(x)]
-                           for x in fmt_columns]
+                           for col, x in zip(self.columns, fmt_columns)]
 
         if self.show_index_names and self.has_index_names:
             for x in str_columns:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -440,29 +440,25 @@ def __repr__(self):
                     else config.max_rows)
         max_columns = config.max_columns
 
+        buf = StringIO()
         if max_columns > 0:
-            buf = StringIO()
             if len(self.index) < max_rows and \
                     len(self.columns) <= max_columns:
                 self.to_string(buf=buf)
             else:
                 self.info(buf=buf, verbose=self._verbose_info)
-            return buf.getvalue()
         else:
             if len(self.index) > max_rows:
-                buf = StringIO()
                 self.info(buf=buf, verbose=self._verbose_info)
-                return buf.getvalue()
             else:
-                buf = StringIO()
                 self.to_string(buf=buf)
                 value = buf.getvalue()
-                if max([len(l) for l in value.split('\n')]) <= terminal_width:
-                    return value
-                else:
+                if max([len(l) for l in value.split('\n')]) > terminal_width:
                     buf = StringIO()
                     self.info(buf=buf, verbose=self._verbose_info)
-                    return buf.getvalue()
+                    value = buf.getvalue()
+                return com.console_encode(value)
+        return com.console_encode(buf.getvalue())
 
     def __iter__(self):
         """
@@ -1030,7 +1026,7 @@ def info(self, verbose=True, buf=None):
         cols = self.columns
 
         if verbose:
-            print >> buf, unicode('Data columns:')
+            print >> buf, 'Data columns:'
             space = max([len(_stringify(k)) for k in self.columns]) + 4
             col_counts = []
             counts = self.count()
@@ -1039,18 +1035,17 @@ def info(self, verbose=True, buf=None):
                 colstr = _stringify(col)
                 col_counts.append('%s%d  non-null values' %
                                   (_put_str(colstr, space), count))
-            print >> buf, unicode('\n'.join(col_counts))
+            print >> buf, '\n'.join(col_counts)
         else:
             if len(cols) <= 2:
-                print >> buf, unicode('Columns: %s' % repr(cols))
+                print >> buf, 'Columns: %s' % repr(cols)
             else:
-                print >> buf, unicode('Columns: %s to %s'
-                                      % (_stringify(cols[0]),
-                                         _stringify(cols[-1])))
+                print >> buf, ('Columns: %s to %s' % (_stringify(cols[0]),
+                                                      _stringify(cols[-1])))
 
         counts = self.get_dtype_counts()
         dtypes = ['%s(%d)' % k for k in sorted(counts.iteritems())]
-        buf.write(u'dtypes: %s' % ', '.join(dtypes))
+        buf.write('dtypes: %s' % ', '.join(dtypes))
 
     @property
     def dtypes(self):
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -559,7 +559,7 @@ def __repr__(self):
         else:
             result = '%s' % ndarray.__repr__(self)
 
-        return result
+        return com.console_encode(result)
 
     def _tidy_repr(self, max_vals=20):
         num = max_vals // 2
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -1814,14 +1814,17 @@ def test_repr_tuples(self):
         repr(df)
         df.to_string(col_space=10, buf=buf)
 
-    def test_to_string_unicode(self):
+    def test_to_string_repr_unicode(self):
         buf = StringIO()
 
         unicode_values = [u'\u03c3'] * 10
         unicode_values = np.array(unicode_values, dtype=object)
         df = DataFrame({'unicode' : unicode_values})
         df.to_string(col_space=10, buf=buf)
 
+        # it works!
+        repr(df)
+
     def test_to_string_unicode_columns(self):
         df = DataFrame({u'\u03c3' : np.arange(10.)})
 
@@ -1833,6 +1836,11 @@ def test_to_string_unicode_columns(self):
         df.info(buf=buf)
         buf.getvalue()
 
+    def test_to_string_with_formatters_unicode(self):
+        df = DataFrame({u'c/\u03c3':[1,2,3]})
+        result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x})
+        self.assertEqual(result, '  c/\xcf\x83\n0 1   \n1 2   \n2 3   ')
+
     def test_head_tail(self):
         assert_frame_equal(self.frame.head(), self.frame[:5])
         assert_frame_equal(self.frame.tail(), self.frame[-5:])
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -747,6 +747,10 @@ def test_repr(self):
                         name=('foo', 'bar', 'baz'))
         repr(biggie)
 
+    def test_repr_unicode(self):
+        s = Series([u'\u03c3'] * 10)
+        repr(s)
+
     def test_to_string(self):
         from cStringIO import StringIO
         buf = StringIO()