Skip to content

Commit bf80c0a

Browse files
committed
Merge remote branch 'y-p/unicode_'
* y-p/unicode_: BUG: use pprint_thing() rather then str() in Block.repr() BUG: use pprint_thing() rather then str() BUG: nested exceptions clobber the exception context, must reraise with named arg TST: nested exceptions clobber the exception context, must reraise with named arg BUG: MultiIndex repr should properly encode unicode labels TST: MultiIndex repr should properly encode unicode labels BUG: df.ix[:,unicode] should not die with UnicodeEncodeError TST: df.ix[:,unicode] should not die with UnicodeEncodeError BUG: pprint_thing() should not realize lazy things BUG: printing df.columns should not raise exception when labels are unicode TST: printing df.columns should not raise exception when labels are unicode
2 parents 02b6f79 + acfa4ab commit bf80c0a

File tree

8 files changed

+51
-10
lines changed

8 files changed

+51
-10
lines changed

pandas/core/common.py

+3
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,9 @@ def pprint_thing(thing, _nest_lvl=0):
11471147
from pandas.core.format import print_config
11481148
if thing is None:
11491149
result = ''
1150+
elif (py3compat.PY3 and hasattr(thing,'__next__')) or \
1151+
hasattr(thing,'next'):
1152+
return unicode(thing)
11501153
elif (isinstance(thing, dict) and
11511154
_nest_lvl < print_config.pprint_nest_depth):
11521155
result = _pprint_dict(thing, _nest_lvl)

pandas/core/format.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,8 @@ def to_string(self, force_unicode=False):
268268
if len(frame.columns) == 0 or len(frame.index) == 0:
269269
info_line = (u'Empty %s\nColumns: %s\nIndex: %s'
270270
% (type(self.frame).__name__,
271-
frame.columns, frame.index))
271+
com.pprint_thing(frame.columns),
272+
com.pprint_thing(frame.index)))
272273
text = info_line
273274
else:
274275
strcols = self._to_str_columns(force_unicode)

pandas/core/frame.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -3913,11 +3913,12 @@ def _apply_standard(self, func, axis, ignore_failures=False):
39133913
try:
39143914
if hasattr(e, 'args'):
39153915
k = res_index[i]
3916-
e.args = e.args + ('occurred at index %s' % str(k),)
3916+
e.args = e.args + ('occurred at index %s' %
3917+
com.pprint_thing(k),)
39173918
except (NameError, UnboundLocalError): # pragma: no cover
39183919
# no k defined yet
39193920
pass
3920-
raise
3921+
raise e
39213922

39223923
if len(results) > 0 and _is_sequence(results[0]):
39233924
if not isinstance(results[0], Series):

pandas/core/index.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,10 @@ def __str__(self):
209209
try:
210210
return np.array_repr(self.values)
211211
except UnicodeError:
212-
converted = u','.join(unicode(x) for x in self.values)
213-
return u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted,
212+
converted = u','.join(com.pprint_thing(x) for x in self.values)
213+
result = u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted,
214214
str(self.values.dtype))
215+
return com.console_encode(result)
215216

216217
def _mpl_repr(self):
217218
# how to represent ourselves to matplotlib
@@ -1320,11 +1321,15 @@ def __repr__(self):
13201321
self[-50:].values])
13211322
else:
13221323
values = self.values
1323-
summary = np.array2string(values, max_line_width=70)
1324+
1325+
summary = com.pprint_thing(values)
13241326

13251327
np.set_printoptions(threshold=options['threshold'])
13261328

1327-
return output % summary
1329+
if py3compat.PY3:
1330+
return output % summary
1331+
else:
1332+
return com.console_encode(output % summary)
13281333

13291334
def __len__(self):
13301335
return len(self.labels[0])

pandas/core/internals.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,10 @@ def set_ref_items(self, ref_items, maybe_rename=True):
4949
self.ref_items = ref_items
5050

5151
def __repr__(self):
52-
shape = ' x '.join([str(s) for s in self.shape])
52+
shape = ' x '.join([com.pprint_thing(s) for s in self.shape])
5353
name = type(self).__name__
54-
return '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype)
54+
result = '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype)
55+
return com.console_encode(result) # repr must return byte-string
5556

5657
def __contains__(self, item):
5758
return item in self.items
@@ -935,7 +936,7 @@ def _find_block(self, item):
935936

936937
def _check_have(self, item):
937938
if item not in self.items:
938-
raise KeyError('no item named %s' % str(item))
939+
raise KeyError('no item named %s' % com.pprint_thing(item))
939940

940941
def reindex_axis(self, new_axis, method=None, axis=0, copy=True):
941942
new_axis = _ensure_index(new_axis)

pandas/tests/test_frame.py

+15
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,21 @@ def test_iget_value(self):
13051305
expected = self.frame.get_value(row, col)
13061306
assert_almost_equal(result, expected)
13071307

1308+
def test_nested_exception(self):
1309+
# Ignore the strange way of triggering the problem
1310+
# (which may get fixed), it's just a way to trigger
1311+
# the issue or reraising an outer exception without
1312+
# a named argument
1313+
df=DataFrame({"a":[1,2,3],"b":[4,5,6],"c":[7,8,9]}).set_index(["a","b"])
1314+
l=list(df.index)
1315+
l[0]=["a","b"]
1316+
df.index=l
1317+
1318+
try:
1319+
print df
1320+
except Exception,e:
1321+
self.assertNotEqual(type(e),UnboundLocalError)
1322+
13081323
_seriesd = tm.getSeriesData()
13091324
_tsd = tm.getTimeSeriesData()
13101325

pandas/tests/test_index.py

+8
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,10 @@ def test_int_name_format(self):
847847
repr(s)
848848
repr(df)
849849

850+
def test_print_unicode_columns(self):
851+
df=pd.DataFrame({u"\u05d0":[1,2,3],"\u05d1":[4,5,6],"c":[7,8,9]})
852+
print(df.columns) # should not raise UnicodeDecodeError
853+
850854
class TestMultiIndex(unittest.TestCase):
851855

852856
def setUp(self):
@@ -1671,6 +1675,10 @@ def test_tolist(self):
16711675
exp = list(self.index.values)
16721676
self.assertEqual(result, exp)
16731677

1678+
def test_repr_with_unicode_data(self):
1679+
d={"a":[u"\u05d0",2,3],"b":[4,5,6],"c":[7,8,9]}
1680+
index=pd.DataFrame(d).set_index(["a","b"]).index
1681+
self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped
16741682

16751683
def test_get_combined_index():
16761684
from pandas.core.index import _get_combined_index

pandas/tests/test_internals.py

+7
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,13 @@ def test_get_numeric_data(self):
408408

409409
self.assertEqual(rs.ix[0, 'bool'], not df.ix[0, 'bool'])
410410

411+
def test_missing_unicode_key(self):
412+
df=DataFrame({"a":[1]})
413+
try:
414+
df.ix[:,u"\u05d0"] # should not raise UnicodeEncodeError
415+
except KeyError:
416+
pass # this is the expected exception
417+
411418
if __name__ == '__main__':
412419
# unittest.main()
413420
import nose

0 commit comments

Comments
 (0)