Change Index repr to adjust to string length

jorisvandenbossche · jreback · commit b2f287b4efbe · 2015-05-08T14:26:04.000-04:00
Conflicts:
	pandas/tseries/base.py

use new format_data

updates

Fix detection of good width

more fixes

Change [

Conflicts:
	pandas/core/index.py
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -255,7 +255,7 @@ Index Representation
 ~~~~~~~~~~~~~~~~~~~~
 
 The string representation of ``Index`` and its sub-classes have now been unified. ``Index, Int64Index, Float64Index, CategoricalIndex`` are single-line display. The datetimelikes ``DatetimeIndex, PeriodIndex, TimedeltaIndex`` & ``MultiIndex`` will display in a multi-line format showing much more of the index values. The display width responds to the option ``display.max_seq_items``,
-which is now defaulted to 20 (previously was 100). (:issue:`6482`)
+which is defaulted to 100. (:issue:`6482`)
 
 Previous Behavior
 
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
     cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
                        validator=is_one_of_factory([True, False, 'truncate']))
     cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
-    cf.register_option('max_seq_items', 20, pc_max_seq_items)
+    cf.register_option('max_seq_items', 100, pc_max_seq_items)
     cf.register_option('mpl_style', None, pc_mpl_style_doc,
                        validator=is_one_of_factory([None, False, 'default']),
                        cb=mpl_style_cb)
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -8,6 +8,7 @@
 from pandas import compat
 import numpy as np
 
+from math import ceil
 from sys import getsizeof
 import pandas.tslib as tslib
 import pandas.lib as lib
@@ -405,8 +406,6 @@ def __unicode__(self):
         # no data provided, just attributes
         if data is None:
             data = ''
-        else:
-            data = "%s,%s" % (data, space)
 
         res = u("%s(%s%s)") % (klass,
                                data,
@@ -437,57 +436,83 @@ def _format_data(self):
         """
         space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
         space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
-        sep = ',%s' % space1
+
+        sep = ','
         max_seq_items = get_option('display.max_seq_items')
         formatter = self._formatter_func
+
+        def best_len(values):
+            return max([len(x) for x in values]) + 2
+
+        def best_rows(values, max_len):
+            from pandas.core.format import get_console_size
+            display_width, _ = get_console_size()
+            if display_width is None:
+                display_width = 80
+            n_per_row = (display_width - len(self.__class__.__name__) - 2) // max_len
+            n_rows = int(ceil(len(values) / float(n_per_row)))
+            return n_per_row, n_rows
+
+        def best_fit(values, max_len, justify=False):
+
+            # number of rows to generate
+            n_per_row, n_rows = best_rows(values, max_len)
+
+            # adjust all values to max length if we have multi-lines
+            if n_rows > 1 or justify:
+                values = [x.rjust(max_len) for x in values]
+                sep_elements = sep
+            else:
+                sep_elements = sep + ' '
+
+            summary = ''
+            for i in range(n_rows - 1):
+                summary += sep.join(values[i*n_per_row:(i+1)*n_per_row])
+                summary += sep
+                summary += space1
+            summary += sep_elements.join(values[(n_rows - 1)*n_per_row:n_rows*n_per_row])
+
+            return summary
+
         n = len(self)
         if n == 0:
-            summary = '[]'
+            summary = '[], '
         elif n == 1:
             first = formatter(self[0])
-            summary = '[%s]' % first
+            summary = '[%s], ' % first
         elif n == 2:
             first = formatter(self[0])
             last = formatter(self[-1])
-            summary = '[%s%s%s]' % (first, sep, last)
+            summary = '[%s, %s], ' % (first, last)
         elif n > max_seq_items:
             n = min(max_seq_items//2,10)
 
-            head = sep.join([ formatter(x) for x in self[:n] ])
-            tail = sep.join([ formatter(x) for x in self[-n:] ])
-            summary = '[%s%s...%s%s]' % (head, space1, space1, tail)
-        else:
-            values = sep.join([ formatter(x) for x in self ])
-            summary = '[%s]' % (values)
+            head = [ formatter(x) for x in self[:n] ]
+            tail = [ formatter(x) for x in self[-n:] ]
+            max_len = max(best_len(head),best_len(tail))
 
-        return summary
+            summary = '['
+            summary += best_fit(head, max_len, justify=True)
+            summary += space1 + ' ...' + space1
+            summary += best_fit(tail, max_len, justify=True)
+            summary += '],'
+            summary += space2
 
-    def _format_data2(self):
-        """
-        Return the formatted data as a unicode string
-        """
-        max_seq_items = get_option('display.max_seq_items')
-        formatter = self._formatter_func
-        n = len(self)
-        if n == 0:
-            summary = '[]'
-        elif n == 1:
-            first = formatter(self[0])
-            summary = '[%s]' % first
-        elif n == 2:
-            first = formatter(self[0])
-            last = formatter(self[-1])
-            summary = '[%s, %s]' % (first, last)
-        elif n > max_seq_items:
-            n = min(max_seq_items//2,5)
-            head = ', '.join([ formatter(x) for x in self[:n] ])
-            tail = ', '.join([ formatter(x) for x in self[-n:] ])
-            summary = '[%s, ..., %s]' % (head, tail)
         else:
-            summary = "[%s]" % ', '.join([ formatter(x) for x in self ])
+            values = [ formatter(x) for x in self ]
 
-        return summary
+            max_len = best_len(values)
+            n_per_row, n_rows = best_rows(values, max_len)
 
+            summary = '['
+            summary += best_fit(values, max_len)
+            summary += '],'
+            if n_rows > 1:
+                summary += space2
+            else:
+                summary += ' '
+
+        return summary
 
     def _format_attrs(self):
         """
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -3220,8 +3220,8 @@ def test_dates(self):
 
     def test_mixed(self):
         text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
-        self.assertTrue("['2013-01-01 00:00:00'," in text)
-        self.assertTrue(", '2014-01-01 00:00:00']" in text)
+        self.assertTrue("'2013-01-01 00:00:00'," in text)
+        self.assertTrue("'2014-01-01 00:00:00']" in text)
 
 
 class TestStringRepTimestamp(tm.TestCase):
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -2463,7 +2463,7 @@ def test_print_unicode_columns(self):
     def test_repr_summary(self):
         with cf.option_context('display.max_seq_items', 10):
             r = repr(pd.Index(np.arange(1000)))
-            self.assertTrue(len(r) < 100)
+            self.assertTrue(len(r) < 150)
             self.assertTrue("..." in r)
 
     def test_repr_roundtrip(self):
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
@@ -123,26 +123,20 @@ def test_representation(self):
 
         exp2 = """DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp3 = """DatetimeIndex(['2011-01-01'
-               '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
+        exp3 = """DatetimeIndex(['2011-01-01', '2011-01-02'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp4 = """DatetimeIndex(['2011-01-01',
-               '2011-01-02',
-               '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
+        exp4 = """DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', freq='D', tz=None)"""
 
-        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00',
-               '2011-01-01 10:00:00+09:00',
-               '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
+        exp5 = """DatetimeIndex(['2011-01-01 09:00:00+09:00', '2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00'], dtype='datetime64[ns]', freq='H', tz='Asia/Tokyo')"""
 
-        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00',
-               '2011-01-01 10:00:00-05:00',
-               'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
+        exp6 = """DatetimeIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', 'NaT'], dtype='datetime64[ns]', freq=None, tz='US/Eastern')"""
 
-        for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
-                                 [exp1, exp2, exp3, exp4, exp5, exp6]):
-            for func in ['__repr__', '__unicode__', '__str__']:
-                result = getattr(idx, func)()
-                self.assertEqual(result, expected)
+        with pd.option_context('display.width', 300):
+            for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6],
+                                     [exp1, exp2, exp3, exp4, exp5, exp6]):
+                for func in ['__repr__', '__unicode__', '__str__']:
+                    result = getattr(idx, func)()
+                    self.assertEqual(result, expected)
 
     def test_summary(self):
         # GH9116
@@ -377,22 +371,18 @@ def test_representation(self):
 
         exp2 = """TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp3 = """TimedeltaIndex(['1 days'
-                '2 days'], dtype='timedelta64[ns]', freq='D')"""
+        exp3 = """TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp4 = """TimedeltaIndex(['1 days',
-                '2 days',
-                '3 days'], dtype='timedelta64[ns]', freq='D')"""
+        exp4 = """TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq='D')"""
 
-        exp5 = """TimedeltaIndex(['1 days 00:00:01',
-                '2 days 00:00:00',
-                '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
+        exp5 = """TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', '3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)"""
 
-        for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
-                                 [exp1, exp2, exp3, exp4, exp5]):
-            for func in ['__repr__', '__unicode__', '__str__']:
-                result = getattr(idx, func)()
-                self.assertEqual(result, expected)
+        with pd.option_context('display.width',300):
+            for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
+                                     [exp1, exp2, exp3, exp4, exp5]):
+                for func in ['__repr__', '__unicode__', '__str__']:
+                    result = getattr(idx, func)()
+                    self.assertEqual(result, expected)
 
     def test_summary(self):
         # GH9116
@@ -846,29 +836,19 @@ def test_representation(self):
 
         exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')"""
 
-        exp3 = """PeriodIndex(['2011-01-01'
-             '2011-01-02'], dtype='int64', freq='D')"""
+        exp3 = """PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', freq='D')"""
 
-        exp4 = """PeriodIndex(['2011-01-01',
-             '2011-01-02',
-             '2011-01-03'], dtype='int64', freq='D')"""
+        exp4 = """PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], dtype='int64', freq='D')"""
 
-        exp5 = """PeriodIndex(['2011',
-             '2012',
-             '2013'], dtype='int64', freq='A-DEC')"""
+        exp5 = """PeriodIndex(['2011', '2012', '2013'], dtype='int64', freq='A-DEC')"""
 
-        exp6 = """PeriodIndex(['2011-01-01 09:00',
-             '2012-02-01 10:00',
-             'NaT'], dtype='int64', freq='H')"""
+        exp6 = """PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], dtype='int64', freq='H')"""
 
         exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')"""
 
-        exp8 = """PeriodIndex(['2013Q1'
-             '2013Q2'], dtype='int64', freq='Q-DEC')"""
+        exp8 = """PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', freq='Q-DEC')"""
 
-        exp9 = """PeriodIndex(['2013Q1',
-             '2013Q2',
-             '2013Q3'], dtype='int64', freq='Q-DEC')"""
+        exp9 = """PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', freq='Q-DEC')"""
 
         for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
                                  [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9]):