formatting MultiIndex

jreback · jreback · commit d994bb768040 · 2015-05-04T08:46:38.000-04:00
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -207,6 +207,51 @@ API changes
 
 - By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`)
 
+.. _whatsnew_0161.index_repr:
+
+Index Representation
+~~~~~~~~~~~~~~~~~~~~
+
+The string representation of ``Index`` and its sub-classes have now been unified. These are all uniform in their output
+formats, except for ``MultiIndex``, which has a multi-line repr. The display width responds to the option ``display.max_seq_len``,
+which is now defaulted to 10 (previously was 100). (:issue:`6482`)
+
+Previous Behavior
+
+.. code-block:: python
+
+
+   In [1]: pd.get_option('max_seq_items')
+   Out[1]: 100
+
+   In [2]: pd.Index(range(4),name='foo')
+   Out[2]: Int64Index([0, 1, 2, 3], dtype='int64')
+
+   In [3]: pd.Index(range(104),name='foo')
+   Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64')
+
+   In [4]: pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
+   Out[4]:
+   <class 'pandas.tseries.index.DatetimeIndex'>
+   [2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00]
+   Length: 4, Freq: D, Timezone: US/Eastern
+
+   In [5]: pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
+   Out[5]:
+   <class 'pandas.tseries.index.DatetimeIndex'>
+   [2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00]
+   Length: 104, Freq: D, Timezone: US/Eastern
+
+New Behavior
+
+.. ipython:: python
+
+   pd.get_option('max_seq_items')
+   pd.Index(range(4),name='foo')
+   pd.Index(range(104),name='foo')
+   pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern')
+   pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern')
+
 .. _whatsnew_0161.performance:
 
 Performance Improvements
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -269,7 +269,7 @@ def mpl_style_cb(key):
     cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
                        validator=is_one_of_factory([True, False, 'truncate']))
     cf.register_option('chop_threshold', None, pc_chop_threshold_doc)
-    cf.register_option('max_seq_items', 100, pc_max_seq_items)
+    cf.register_option('max_seq_items', 10, pc_max_seq_items)
     cf.register_option('mpl_style', None, pc_mpl_style_doc,
                        validator=is_one_of_factory([None, False, 'default']),
                        cb=mpl_style_cb)
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -395,21 +395,33 @@ def __unicode__(self):
         klass = self.__class__.__name__
         data = self._format_data()
         attrs = self._format_attrs()
-        max_seq_items = get_option('display.max_seq_items')
-        if len(self) > max_seq_items:
-            space = "\n%s" % (' ' * (len(klass) + 1))
-        else:
-            space = " "
+        space = self._format_space()
 
         prepr = (u(",%s") % space).join([u("%s=%s") % (k, v)
                                           for k, v in attrs])
-        res = u("%s(%s,%s%s)") % (klass,
-                                  data,
-                                  space,
-                                  prepr)
+
+        # no data provided, just attributes
+        if data is None:
+            data = ''
+        else:
+            data = "%s,%s" % (data, space)
+
+        res = u("%s(%s%s)") % (klass,
+                               data,
+                               prepr)
 
         return res
 
+    def _format_space(self):
+
+        # using space here controls if the attributes
+        # are line separated or not (the default)
+
+        #max_seq_items = get_option('display.max_seq_items')
+        #if len(self) > max_seq_items:
+        #    space = "\n%s" % (' ' * (len(klass) + 1))
+        return " "
+
     @property
     def _formatter_func(self):
         """
@@ -421,7 +433,6 @@ def _format_data(self):
         """
         Return the formatted data as a unicode string
         """
-
         max_seq_items = get_option('display.max_seq_items')
         formatter = self._formatter_func
         n = len(self)
@@ -450,9 +461,12 @@ def _format_attrs(self):
         Return a list of tuples of the (attr,formatted_value)
         """
         attrs = []
+        attrs.append(('dtype',"'%s'" % self.dtype))
         if self.name is not None:
             attrs.append(('name',default_pprint(self.name)))
-        attrs.append(('dtype',"'%s'" % self.dtype))
+        max_seq_items = get_option('display.max_seq_items')
+        if len(self) > max_seq_items:
+            attrs.append(('length',len(self)))
         return attrs
 
     def to_series(self, **kwargs):
@@ -3931,40 +3945,24 @@ def nbytes(self):
         names_nbytes = sum(( getsizeof(i) for i in self.names ))
         return level_nbytes + label_nbytes + names_nbytes
 
-    def __repr__(self):
-        encoding = get_option('display.encoding')
+    def _format_attrs(self):
+        """
+        Return a list of tuples of the (attr,formatted_value)
+        """
         attrs = [('levels', default_pprint(self.levels)),
                  ('labels', default_pprint(self.labels))]
         if not all(name is None for name in self.names):
             attrs.append(('names', default_pprint(self.names)))
         if self.sortorder is not None:
             attrs.append(('sortorder', default_pprint(self.sortorder)))
+        return attrs
 
-        space = ' ' * (len(self.__class__.__name__) + 1)
-        prepr = (u(",\n%s") % space).join([u("%s=%s") % (k, v)
-                                          for k, v in attrs])
-        res = u("%s(%s)") % (self.__class__.__name__, prepr)
-
-        if not compat.PY3:
-            # needs to be str in Python 2
-            res = res.encode(encoding)
-        return res
-
-    def __unicode__(self):
-        """
-        Return a string representation for a particular Index
+    def _format_space(self):
+        return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
 
-        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
-        py2/py3.
-        """
-        rows = self.format(names=True)
-        max_rows = get_option('display.max_rows')
-        if len(rows) > max_rows:
-            spaces = (len(rows[0]) - 3) // 2
-            centered = ' ' * spaces
-            half = max_rows // 2
-            rows = rows[:half] + [centered + '...' + centered] + rows[-half:]
-        return "\n".join(rows)
+    def _format_data(self):
+        # we are formatting thru the attributes
+        return None
 
     def __len__(self):
         return len(self.labels[0])
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -3215,13 +3215,13 @@ def test_date_explict_date_format(self):
 class TestDatetimeIndexUnicode(tm.TestCase):
     def test_dates(self):
         text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1)]))
-        self.assertTrue("[2013-01-01," in text)
-        self.assertTrue(", 2014-01-01]" in text)
+        self.assertTrue("['2013-01-01'," in text)
+        self.assertTrue(", '2014-01-01']" in text)
 
     def test_mixed(self):
         text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)]))
-        self.assertTrue("[2013-01-01 00:00:00," in text)
-        self.assertTrue(", 2014-01-01 00:00:00]" in text)
+        self.assertTrue("['2013-01-01 00:00:00'," in text)
+        self.assertTrue(", '2014-01-01 00:00:00']" in text)
 
 
 class TestStringRepTimestamp(tm.TestCase):
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -2480,16 +2480,13 @@ def test_slice_keep_name(self):
 
 class DatetimeLike(Base):
 
-    def test_repr_roundtrip(self):
-        raise nose.SkipTest("Short reprs are not supported repr for Datetimelike indexes")
-
     def test_str(self):
 
         # test the string repr
         idx = self.create_index()
         idx.name = 'foo'
-        self.assertTrue("length=%s" % len(idx) in str(idx))
-        self.assertTrue("u'foo'" in str(idx))
+        self.assertFalse("length=%s" % len(idx) in str(idx))
+        self.assertTrue("'foo'" in str(idx))
         self.assertTrue(idx.__class__.__name__ in str(idx))
 
         if hasattr(idx,'tz'):
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
@@ -255,17 +255,13 @@ def argmax(self, axis=None):
 
     @property
     def _formatter_func(self):
-        """
-        Format function to convert value to representation
-        """
-        return str
+        raise AbstractMethodError(self)
 
     def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
         """
         attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
-        attrs.append(('length',len(self)))
         for attrib in self._attributes:
             if attrib == 'freq':
                 freq = self.freqstr
@@ -499,4 +495,6 @@ def summary(self, name=None):
         if self.freq:
             result += '\nFreq: %s' % self.freqstr
 
+        # display as values, not quoted
+        result = result.replace("'","")
         return result
diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
@@ -293,6 +293,10 @@ def _to_embed(self, keep_tz=False):
         """ return an array repr of this object, potentially casting to object """
         return self.asobject.values
 
+    @property
+    def _formatter_func(self):
+        return lambda x: "'%s'" % x
+
     def asof_locs(self, where, mask):
         """
         where : array of timestamps
diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py