Skip to content

Commit b4ca18b

Browse files
committed
ENH: better console unicode handling, GH #680, #340
1 parent 4e165ed commit b4ca18b

File tree

6 files changed

+37
-21
lines changed

6 files changed

+37
-21
lines changed

Diff for: pandas/core/common.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ def _stringify(col):
509509
if isinstance(col, tuple):
510510
return str(col)
511511
else:
512-
return '%s' % col
512+
return '%s' % console_encode(col)
513513

514514
def _float_format_default(v, width=None):
515515
"""
@@ -814,3 +814,12 @@ def load(path):
814814
f.close()
815815

816816

817+
def console_encode(value):
818+
if not isinstance(value, unicode):
819+
return value
820+
821+
import sys
822+
if sys.stdin is not None and sys.stdin.encoding is not None:
823+
return value.encode(sys.stdin.encoding)
824+
else:
825+
return value.encode('ascii', 'replace')

Diff for: pandas/core/format.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,9 @@ def is_numeric_dtype(dtype):
371371
dtypes = self.frame.dtypes
372372
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
373373
str_columns = [[' %s' % x
374-
if x not in formatters and need_leadsp[x]
374+
if col not in formatters and need_leadsp[x]
375375
else str(x)]
376-
for x in fmt_columns]
376+
for col, x in zip(self.columns, fmt_columns)]
377377

378378
if self.show_index_names and self.has_index_names:
379379
for x in str_columns:

Diff for: pandas/core/frame.py

+11-16
Original file line numberDiff line numberDiff line change
@@ -440,29 +440,25 @@ def __repr__(self):
440440
else config.max_rows)
441441
max_columns = config.max_columns
442442

443+
buf = StringIO()
443444
if max_columns > 0:
444-
buf = StringIO()
445445
if len(self.index) < max_rows and \
446446
len(self.columns) <= max_columns:
447447
self.to_string(buf=buf)
448448
else:
449449
self.info(buf=buf, verbose=self._verbose_info)
450-
return buf.getvalue()
451450
else:
452451
if len(self.index) > max_rows:
453-
buf = StringIO()
454452
self.info(buf=buf, verbose=self._verbose_info)
455-
return buf.getvalue()
456453
else:
457-
buf = StringIO()
458454
self.to_string(buf=buf)
459455
value = buf.getvalue()
460-
if max([len(l) for l in value.split('\n')]) <= terminal_width:
461-
return value
462-
else:
456+
if max([len(l) for l in value.split('\n')]) > terminal_width:
463457
buf = StringIO()
464458
self.info(buf=buf, verbose=self._verbose_info)
465-
return buf.getvalue()
459+
value = buf.getvalue()
460+
return com.console_encode(value)
461+
return com.console_encode(buf.getvalue())
466462

467463
def __iter__(self):
468464
"""
@@ -1030,7 +1026,7 @@ def info(self, verbose=True, buf=None):
10301026
cols = self.columns
10311027

10321028
if verbose:
1033-
print >> buf, unicode('Data columns:')
1029+
print >> buf, 'Data columns:'
10341030
space = max([len(_stringify(k)) for k in self.columns]) + 4
10351031
col_counts = []
10361032
counts = self.count()
@@ -1039,18 +1035,17 @@ def info(self, verbose=True, buf=None):
10391035
colstr = _stringify(col)
10401036
col_counts.append('%s%d non-null values' %
10411037
(_put_str(colstr, space), count))
1042-
print >> buf, unicode('\n'.join(col_counts))
1038+
print >> buf, '\n'.join(col_counts)
10431039
else:
10441040
if len(cols) <= 2:
1045-
print >> buf, unicode('Columns: %s' % repr(cols))
1041+
print >> buf, 'Columns: %s' % repr(cols)
10461042
else:
1047-
print >> buf, unicode('Columns: %s to %s'
1048-
% (_stringify(cols[0]),
1049-
_stringify(cols[-1])))
1043+
print >> buf, ('Columns: %s to %s' % (_stringify(cols[0]),
1044+
_stringify(cols[-1])))
10501045

10511046
counts = self.get_dtype_counts()
10521047
dtypes = ['%s(%d)' % k for k in sorted(counts.iteritems())]
1053-
buf.write(u'dtypes: %s' % ', '.join(dtypes))
1048+
buf.write('dtypes: %s' % ', '.join(dtypes))
10541049

10551050
@property
10561051
def dtypes(self):

Diff for: pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ def __repr__(self):
559559
else:
560560
result = '%s' % ndarray.__repr__(self)
561561

562-
return result
562+
return com.console_encode(result)
563563

564564
def _tidy_repr(self, max_vals=20):
565565
num = max_vals // 2

Diff for: pandas/tests/test_frame.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -1814,14 +1814,17 @@ def test_repr_tuples(self):
18141814
repr(df)
18151815
df.to_string(col_space=10, buf=buf)
18161816

1817-
def test_to_string_unicode(self):
1817+
def test_to_string_repr_unicode(self):
18181818
buf = StringIO()
18191819

18201820
unicode_values = [u'\u03c3'] * 10
18211821
unicode_values = np.array(unicode_values, dtype=object)
18221822
df = DataFrame({'unicode' : unicode_values})
18231823
df.to_string(col_space=10, buf=buf)
18241824

1825+
# it works!
1826+
repr(df)
1827+
18251828
def test_to_string_unicode_columns(self):
18261829
df = DataFrame({u'\u03c3' : np.arange(10.)})
18271830

@@ -1833,6 +1836,11 @@ def test_to_string_unicode_columns(self):
18331836
df.info(buf=buf)
18341837
buf.getvalue()
18351838

1839+
def test_to_string_with_formatters_unicode(self):
1840+
df = DataFrame({u'c/\u03c3':[1,2,3]})
1841+
result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x})
1842+
self.assertEqual(result, ' c/\xcf\x83\n0 1 \n1 2 \n2 3 ')
1843+
18361844
def test_head_tail(self):
18371845
assert_frame_equal(self.frame.head(), self.frame[:5])
18381846
assert_frame_equal(self.frame.tail(), self.frame[-5:])

Diff for: pandas/tests/test_series.py

+4
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,10 @@ def test_repr(self):
747747
name=('foo', 'bar', 'baz'))
748748
repr(biggie)
749749

750+
def test_repr_unicode(self):
751+
s = Series([u'\u03c3'] * 10)
752+
repr(s)
753+
750754
def test_to_string(self):
751755
from cStringIO import StringIO
752756
buf = StringIO()

0 commit comments

Comments
 (0)