Skip to content

Commit b2f287b

Browse files
jorisvandenbosschejreback
authored andcommitted
Change Index repr to adjust to string length
Conflicts: pandas/tseries/base.py use new format_data updates Fix detection of good width more fixes Change [ Conflicts: pandas/core/index.py
1 parent c46bd61 commit b2f287b

File tree

6 files changed

+91
-86
lines changed

6 files changed

+91
-86
lines changed

doc/source/whatsnew/v0.16.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ Index Representation
255255
~~~~~~~~~~~~~~~~~~~~
256256

257257
The string representation of ``Index`` and its sub-classes have now been unified. ``Index, Int64Index, Float64Index, CategoricalIndex`` are single-line display. The datetimelikes ``DatetimeIndex, PeriodIndex, TimedeltaIndex`` & ``MultiIndex`` will display in a multi-line format showing much more of the index values. The display width responds to the option ``display.max_seq_items``,
258-
which is now defaulted to 20 (previously was 100). (:issue:`6482`)
258+
which is defaulted to 100. (:issue:`6482`)
259259

260260
Previous Behavior
261261

pandas/core/config_init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
269269
cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
270270
validator=is_one_of_factory([True, False, 'truncate']))
271271
cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
272-
cf.register_option('max_seq_items', 20, pc_max_seq_items)
272+
cf.register_option('max_seq_items', 100, pc_max_seq_items)
273273
cf.register_option('mpl_style', None, pc_mpl_style_doc,
274274
validator=is_one_of_factory([None, False, 'default']),
275275
cb=mpl_style_cb)

pandas/core/index.py

+61-36
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas import compat
99
import numpy as np
1010

11+
from math import ceil
1112
from sys import getsizeof
1213
import pandas.tslib as tslib
1314
import pandas.lib as lib
@@ -405,8 +406,6 @@ def __unicode__(self):
405406
# no data provided, just attributes
406407
if data is None:
407408
data = ''
408-
else:
409-
data = "%s,%s" % (data, space)
410409

411410
res = u("%s(%s%s)") % (klass,
412411
data,
@@ -437,57 +436,83 @@ def _format_data(self):
437436
"""
438437
space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
439438
space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
440-
sep = ',%s' % space1
439+
440+
sep = ','
441441
max_seq_items = get_option('display.max_seq_items')
442442
formatter = self._formatter_func
443+
444+
def best_len(values):
445+
return max([len(x) for x in values]) + 2
446+
447+
def best_rows(values, max_len):
448+
from pandas.core.format import get_console_size
449+
display_width, _ = get_console_size()
450+
if display_width is None:
451+
display_width = 80
452+
n_per_row = (display_width - len(self.__class__.__name__) - 2) // max_len
453+
n_rows = int(ceil(len(values) / float(n_per_row)))
454+
return n_per_row, n_rows
455+
456+
def best_fit(values, max_len, justify=False):
457+
458+
# number of rows to generate
459+
n_per_row, n_rows = best_rows(values, max_len)
460+
461+
# adjust all values to max length if we have multi-lines
462+
if n_rows > 1 or justify:
463+
values = [x.rjust(max_len) for x in values]
464+
sep_elements = sep
465+
else:
466+
sep_elements = sep + ' '
467+
468+
summary = ''
469+
for i in range(n_rows - 1):
470+
summary += sep.join(values[i*n_per_row:(i+1)*n_per_row])
471+
summary += sep
472+
summary += space1
473+
summary += sep_elements.join(values[(n_rows - 1)*n_per_row:n_rows*n_per_row])
474+
475+
return summary
476+
443477
n = len(self)
444478
if n == 0:
445-
summary = '[]'
479+
summary = '[], '
446480
elif n == 1:
447481
first = formatter(self[0])
448-
summary = '[%s]' % first
482+
summary = '[%s], ' % first
449483
elif n == 2:
450484
first = formatter(self[0])
451485
last = formatter(self[-1])
452-
summary = '[%s%s%s]' % (first, sep, last)
486+
summary = '[%s, %s], ' % (first, last)
453487
elif n > max_seq_items:
454488
n = min(max_seq_items//2,10)
455489

456-
head = sep.join([ formatter(x) for x in self[:n] ])
457-
tail = sep.join([ formatter(x) for x in self[-n:] ])
458-
summary = '[%s%s...%s%s]' % (head, space1, space1, tail)
459-
else:
460-
values = sep.join([ formatter(x) for x in self ])
461-
summary = '[%s]' % (values)
490+
head = [ formatter(x) for x in self[:n] ]
491+
tail = [ formatter(x) for x in self[-n:] ]
492+
max_len = max(best_len(head),best_len(tail))
462493

463-
return summary
494+
summary = '['
495+
summary += best_fit(head, max_len, justify=True)
496+
summary += space1 + ' ...' + space1
497+
summary += best_fit(tail, max_len, justify=True)
498+
summary += '],'
499+
summary += space2
464500

465-
def _format_data2(self):
466-
"""
467-
Return the formatted data as a unicode string
468-
"""
469-
max_seq_items = get_option('display.max_seq_items')
470-
formatter = self._formatter_func
471-
n = len(self)
472-
if n == 0:
473-
summary = '[]'
474-
elif n == 1:
475-
first = formatter(self[0])
476-
summary = '[%s]' % first
477-
elif n == 2:
478-
first = formatter(self[0])
479-
last = formatter(self[-1])
480-
summary = '[%s, %s]' % (first, last)
481-
elif n > max_seq_items:
482-
n = min(max_seq_items//2,5)
483-
head = ', '.join([ formatter(x) for x in self[:n] ])
484-
tail = ', '.join([ formatter(x) for x in self[-n:] ])
485-
summary = '[%s, ..., %s]' % (head, tail)
486501
else:
487-
summary = "[%s]" % ', '.join([ formatter(x) for x in self ])
502+
values = [ formatter(x) for x in self ]
488503

489-
return summary
504+
max_len = best_len(values)
505+
n_per_row, n_rows = best_rows(values, max_len)
490506

507+
summary = '['
508+
summary += best_fit(values, max_len)
509+
summary += '],'
510+
if n_rows > 1:
511+
summary += space2
512+
else:
513+
summary += ' '
514+
515+
return summary
491516

492517
def _format_attrs(self):
493518
"""

pandas/tests/test_format.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3220,8 +3220,8 @@ def test_dates(self):
32203220

32213221
def test_mixed(self):
32223222
text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
3223-
self.assertTrue("['2013-01-01 00:00:00'," in text)
3224-
self.assertTrue(", '2014-01-01 00:00:00']" in text)
3223+
self.assertTrue("'2013-01-01 00:00:00'," in text)
3224+
self.assertTrue("'2014-01-01 00:00:00']" in text)
32253225

32263226

32273227
class TestStringRepTimestamp(tm.TestCase):

pandas/tests/test_index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2463,7 +2463,7 @@ def test_print_unicode_columns(self):
24632463
def test_repr_summary(self):
24642464
with cf.option_context('display.max_seq_items', 10):
24652465
r = repr(pd.Index(np.arange(1000)))
2466-
self.assertTrue(len(r) < 100)
2466+
self.assertTrue(len(r) < 150)
24672467
self.assertTrue("..." in r)
24682468

24692469
def test_repr_roundtrip(self):

pandas/tseries/tests/test_base.py

+25-45
Original file line numberDiff line numberDiff line change
@@ -123,26 +123,20 @@ def test_representation(self):
123123

124124
exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)"""
125125

126-
exp3 = """DatetimeIndex(['2011-01-01'
127-
'2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
126+
exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
128127

129-
exp4 = """DatetimeIndex(['2011-01-01',
130-
'2011-01-02',
131-
'2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
128+
exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
132129

133-
exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00',
134-
'2011-01-01 10:00:00+09:00',
135-
'2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
130+
exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
136131

137-
exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00',
138-
'2011-01-01 10:00:00-05:00',
139-
'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
132+
exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
140133

141-
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
142-
[exp1, exp2, exp3, exp4, exp5, exp6]):
143-
for func in ['__repr__', '__unicode__', '__str__']:
144-
result = getattr(idx, func)()
145-
self.assertEqual(result, expected)
134+
with pd.option_context('display.width', 300):
135+
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
136+
[exp1, exp2, exp3, exp4, exp5, exp6]):
137+
for func in ['__repr__', '__unicode__', '__str__']:
138+
result = getattr(idx, func)()
139+
self.assertEqual(result, expected)
146140

147141
def test_summary(self):
148142
# GH9116
@@ -377,22 +371,18 @@ def test_representation(self):
377371

378372
exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"""
379373

380-
exp3 = """TimedeltaIndex(['1 days'
381-
'2 days'], dtype='timedelta64[ns]', freq='D')"""
374+
exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"""
382375

383-
exp4 = """TimedeltaIndex(['1 days',
384-
'2 days',
385-
'3 days'], dtype='timedelta64[ns]', freq='D')"""
376+
exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')"""
386377

387-
exp5 = """TimedeltaIndex(['1 days 00:00:01',
388-
'2 days 00:00:00',
389-
'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
378+
exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
390379

391-
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
392-
[exp1, exp2, exp3, exp4, exp5]):
393-
for func in ['__repr__', '__unicode__', '__str__']:
394-
result = getattr(idx, func)()
395-
self.assertEqual(result, expected)
380+
with pd.option_context('display.width',300):
381+
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
382+
[exp1, exp2, exp3, exp4, exp5]):
383+
for func in ['__repr__', '__unicode__', '__str__']:
384+
result = getattr(idx, func)()
385+
self.assertEqual(result, expected)
396386

397387
def test_summary(self):
398388
# GH9116
@@ -846,29 +836,19 @@ def test_representation(self):
846836

847837
exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')"""
848838

849-
exp3 = """PeriodIndex(['2011-01-01'
850-
'2011-01-02'], dtype='int64', freq='D')"""
839+
exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')"""
851840

852-
exp4 = """PeriodIndex(['2011-01-01',
853-
'2011-01-02',
854-
'2011-01-03'], dtype='int64', freq='D')"""
841+
exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')"""
855842

856-
exp5 = """PeriodIndex(['2011',
857-
'2012',
858-
'2013'], dtype='int64', freq='A-DEC')"""
843+
exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')"""
859844

860-
exp6 = """PeriodIndex(['2011-01-01 09:00',
861-
'2012-02-01 10:00',
862-
'NaT'], dtype='int64', freq='H')"""
845+
exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')"""
863846

864847
exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')"""
865848

866-
exp8 = """PeriodIndex(['2013Q1'
867-
'2013Q2'], dtype='int64', freq='Q-DEC')"""
849+
exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')"""
868850

869-
exp9 = """PeriodIndex(['2013Q1',
870-
'2013Q2',
871-
'2013Q3'], dtype='int64', freq='Q-DEC')"""
851+
exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')"""
872852

873853
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
874854
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):

0 commit comments

Comments
 (0)